From 13014885861f87bf9fd5a99afdfe3153cca852a5 Mon Sep 17 00:00:00 2001
From: Keith Sheppard <keith.sheppard@jax.org>
Date: Fri, 15 Oct 2021 16:31:39 -0400
Subject: [PATCH] initial commit

---
 .gitignore                                    |  112 +
 LICENSE                                       |   21 +
 README.md                                     |  125 +
 .../coco/hrnet/w32_256x192_adam_lr1e-3.yaml   |  127 +
 .../coco/hrnet/w32_384x288_adam_lr1e-3.yaml   |  127 +
 .../coco/hrnet/w48_256x192_adam_lr1e-3.yaml   |  127 +
 .../coco/hrnet/w48_384x288_adam_lr1e-3.yaml   |  127 +
 .../res101_256x192_d256x3_adam_lr1e-3.yaml    |   83 +
 .../res101_384x288_d256x3_adam_lr1e-3.yaml    |   83 +
 .../res152_256x192_d256x3_adam_lr1e-3.yaml    |   83 +
 .../res152_384x288_d256x3_adam_lr1e-3.yaml    |   83 +
 .../res50_256x192_d256x3_adam_lr1e-3.yaml     |   83 +
 .../res50_384x288_d256x3_adam_lr1e-3.yaml     |   83 +
 experiments/corner/corner_2020-06-30_01.yaml  |  143 +
 .../fecalboli/fecalboli_2020-05-0-08.yaml     |  148 +
 .../fecalboli/fecalboli_2020-06-19_01.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_02.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_03.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_04.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_05.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_06.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_07.yaml    |  149 +
 .../fecalboli/fecalboli_2020-06-19_08.yaml    |  149 +
 .../2019-05-23-param-search/mouse-pose-1.yaml |  147 +
 .../mouse-pose-10.yaml                        |  149 +
 .../mouse-pose-11.yaml                        |  149 +
 .../mouse-pose-12.yaml                        |  149 +
 .../mouse-pose-13.yaml                        |  147 +
 .../mouse-pose-14.yaml                        |  149 +
 .../2019-05-23-param-search/mouse-pose-2.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-3.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-4.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-5.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-6.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-7.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-8.yaml |  147 +
 .../2019-05-23-param-search/mouse-pose-9.yaml |  147 +
 .../2019-06-18-param-search/mp-conf1.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf10.yaml    |  140 +
 .../2019-06-18-param-search/mp-conf11.yaml    |  140 +
 .../2019-06-18-param-search/mp-conf2.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf3.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf4.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf5.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf6.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf7.yaml     |  142 +
 .../2019-06-18-param-search/mp-conf8.yaml     |  140 +
 .../2019-06-18-param-search/mp-conf9.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf1.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf2.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf3.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf4.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf5.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf6.yaml     |  140 +
 .../2019-06-26-param-search/mp-conf7.yaml     |  140 +
 .../hrnet/w32_256x256_full_mouse_pose.yaml    |  148 +
 .../hrnet/w32_256x256_mouse_pose.yaml         |  145 +
 .../mpii/hrnet/w32_256x256_adam_lr1e-3.yaml   |  120 +
 .../mpii/hrnet/w48_256x256_adam_lr1e-3.yaml   |  120 +
 .../res101_256x256_d256x3_adam_lr1e-3.yaml    |   86 +
 .../res152_256x256_d256x3_adam_lr1e-3.yaml    |   86 +
 .../res50_256x256_d256x3_adam_lr1e-3.yaml     |   86 +
 experiments/multimouse/multimouse-1.yaml      |  145 +
 experiments/multimouse/multimouse-2.yaml      |  145 +
 experiments/multimouse/multimouse-3.yaml      |  145 +
 experiments/multimouse/multimouse-4.yaml      |  145 +
 experiments/multimouse/multimouse-5.yaml      |  145 +
 experiments/multimouse/multimouse-6.yaml      |  145 +
 .../multimouse/multimouse_2019-11-19_1.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_10.yaml  |  146 +
 .../multimouse/multimouse_2019-11-19_2.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_3.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_4.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_5.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_6.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_7.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_8.yaml   |  146 +
 .../multimouse/multimouse_2019-11-19_9.yaml   |  146 +
 .../multimouse/multimouse_2019-12-19_1.yaml   |  147 +
 .../multimouse/multimouse_2019-12-19_2.yaml   |  147 +
 .../multimouse/multimouse_2019-12-19_3.yaml   |  148 +
 .../multimouse/multimouse_2019-12-19_4.yaml   |  149 +
 .../multimouse/multimouse_2019-12-19_5.yaml   |  148 +
 .../multimouse/multimouse_2019-12-19_6.yaml   |  148 +
 .../multimouse/multimouse_2019-12-19_7.yaml   |  149 +
 .../multimouse/multimouse_2019-12-19_8.yaml   |  149 +
 .../multimouse/multimouse_2019-12-31_1.yaml   |  151 +
 .../multimouse/multimouse_2020-01-15_1.yaml   |  152 +
 .../multimouse/multimouse_2020-01-17_01.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_02.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_03.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_04.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_05.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_06.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_07.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_08.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_09.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_10.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_11.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_12.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_13.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_14.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_15.yaml  |  152 +
 .../multimouse/multimouse_2020-01-17_16.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_01.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_02.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_03.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_04.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_05.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_06.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_07.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_08.yaml  |  152 +
 .../multimouse/multimouse_2020-01-21_09.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_01.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_02.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_03.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_04.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_05.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_06.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_07.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_08.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_09.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_10.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_11.yaml  |  152 +
 .../multimouse/multimouse_2020-01-22_12.yaml  |  152 +
 .../multimouse/multimouse_2020-01-30_01.yaml  |  148 +
 .../multimouse/multimouse_2020-01-30_02.yaml  |  152 +
 .../multimouse/multimouse_2020-01-30_03.yaml  |  148 +
 .../multimouse/multimouse_2020-01-30_04.yaml  |  148 +
 .../multimouse/multimouse_2020-01-30_05.yaml  |  152 +
 .../multimouse/multimouse_2020-01-30_06.yaml  |  152 +
 .../multimouse/multimouse_2020-01-30_07.yaml  |  152 +
 .../multimouse/multimouse_2020-02-03_01.yaml  |  147 +
 .../multimouse/multimouse_2020-02-03_02.yaml  |  147 +
 .../multimouse/multimouse_2020-02-03_03.yaml  |  147 +
 .../multimouse/multimouse_2020-02-03_04.yaml  |  147 +
 .../multimouse/multimouse_2020-02-03_05.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_06.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_07.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_08.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_09.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_10.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_11.yaml  |  151 +
 .../multimouse/multimouse_2020-02-03_12.yaml  |  151 +
 .../multimouse/multimouse_2020-02-10_01.yaml  |  151 +
 .../multimouse/multimouse_2020-02-10_02.yaml  |  151 +
 .../multimouse/multimouse_2020-02-10_03.yaml  |  151 +
 experiments/objseg/objseg.yaml                |  134 +
 figures/hrnet.png                             |  Bin 0 -> 29707 bytes
 lib/Makefile                                  |    4 +
 lib/config/__init__.py                        |    9 +
 lib/config/default.py                         |  181 +
 lib/config/models.py                          |   58 +
 lib/core/assocembedfunc.py                    |  155 +
 lib/core/assocembedloss.py                    |  306 +
 lib/core/cornerfunction.py                    |  367 +
 lib/core/evaluate.py                          |   77 +
 lib/core/fecalbolifunc.py                     |  161 +
 lib/core/focalloss.py                         |   65 +
 lib/core/function.py                          |  303 +
 lib/core/inference.py                         |   79 +
 lib/core/loss.py                              |   88 +
 lib/core/segfunction.py                       |  124 +
 lib/dataset/JointsDataset.py                  |  289 +
 lib/dataset/OpenFieldObjDataset.py            |  163 +
 lib/dataset/__init__.py                       |   13 +
 lib/dataset/coco.py                           |  445 ++
 lib/dataset/cornerdataset.py                  |  271 +
 lib/dataset/fecalbolidata.py                  |  301 +
 lib/dataset/hdf5mousepose.py                  |  191 +
 lib/dataset/mpii.py                           |  181 +
 lib/dataset/multimousepose.py                 |  399 +
 lib/dataset/simplepointdata.py                |  283 +
 lib/models/__init__.py                        |   16 +
 lib/models/pose_hrnet.py                      |  639 ++
 lib/models/pose_resnet.py                     |  271 +
 lib/nms/__init__.py                           |    0
 lib/nms/cpu_nms.pyx                           |   71 +
 lib/nms/gpu_nms.cu                            | 7080 +++++++++++++++++
 lib/nms/gpu_nms.hpp                           |    2 +
 lib/nms/gpu_nms.pyx                           |   34 +
 lib/nms/nms.py                                |  180 +
 lib/nms/nms_kernel.cu                         |  143 +
 lib/nms/setup_linux.py                        |  141 +
 lib/utils/__init__.py                         |    0
 lib/utils/assocembedutil.py                   |  376 +
 lib/utils/transforms.py                       |  121 +
 lib/utils/utils.py                            |  204 +
 lib/utils/vis.py                              |  141 +
 lib/utils/xform.py                            |   69 +
 lib/utils/zipreader.py                        |   70 +
 requirements.txt                              |   19 +
 samplevids/samplevidsbatch.sh                 |   72 +
 test-multi-mouse-pose.sh                      |  166 +
 tools/_init_paths.py                          |   27 +
 tools/addpixelunits.py                        |   66 +
 tools/extractframes.py                        |  108 +
 tools/gathercvatframes.py                     |  114 +
 tools/infercorners.py                         |  270 +
 tools/inferfecalbolicount.py                  |  245 +
 tools/inferfecalbolicountbatch.py             |  131 +
 tools/infermousepose.py                       |  153 +
 tools/infermultimousepose.py                  |  528 ++
 tools/inferobjects.py                         |  267 +
 tools/listcvatnetids.py                       |   48 +
 tools/mousetrain.py                           |  259 +
 tools/ofobjecttrain.py                        |  194 +
 tools/rendercvat.py                           |   86 +
 tools/rendervidoverlay.py                     |  360 +
 tools/sampleframes.py                         |  201 +
 tools/testcornermodel.py                      |  222 +
 tools/testfecalboli.py                        |  251 +
 tools/testfecalbolidata.py                    |  121 +
 tools/testmouseposemodel.py                   |  250 +
 tools/testmultimousedata.py                   |  137 +
 tools/testmultimouseinference.py              |  377 +
 tools/trainfecalboli.py                       |  226 +
 tools/trainmultimouse.py                      |  267 +
 tools/trainsimplepoint.py                     |  232 +
 vm/corner-detection-2021-08-25.def            |   36 +
 vm/deep-hres-net-2019-06-28.def               |   29 +
 vm/extract-frames.sh                          |   65 +
 vm/infer-corners-batch.sh                     |   93 +
 vm/infer-multi-poseest-batch.sh               |   92 +
 vm/infer-obj-seg.sh                           |   65 +
 vm/infer-poseest-batch.sh                     |   92 +
 vm/multi-mouse-pose-2019-11-04.def            |   39 +
 vm/multi-mouse-pose-2020-02-12.def            |   39 +
 vm/obj-seg-2019-07-15.def                     |   29 +
 vm/obj-seg-2019-07-16.def                     |   31 +
 vm/obj-seg-2019-07-17.def                     |   31 +
 vm/train-fboli-detection.sh                   |   24 +
 vm/train-multi-mouse-pose.sh                  |   21 +
 233 files changed, 41169 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 LICENSE
 create mode 100644 README.md
 create mode 100644 experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
 create mode 100644 experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml
 create mode 100644 experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml
 create mode 100644 experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
 create mode 100755 experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml
 create mode 100644 experiments/corner/corner_2020-06-30_01.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-05-0-08.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_01.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_02.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_03.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_04.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_05.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_06.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_07.yaml
 create mode 100644 experiments/fecalboli/fecalboli_2020-06-19_08.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-1.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-10.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-11.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-12.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-13.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-14.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-2.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-3.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-4.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-5.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-6.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-7.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-8.yaml
 create mode 100644 experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-9.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf1.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf10.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf11.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf2.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf3.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf4.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf5.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf6.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf7.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf8.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-18-param-search/mp-conf9.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf1.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf2.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf3.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf4.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf5.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf6.yaml
 create mode 100644 experiments/hdf5mouse/2019-06-26-param-search/mp-conf7.yaml
 create mode 100644 experiments/hdf5mouse/hrnet/w32_256x256_full_mouse_pose.yaml
 create mode 100644 experiments/hdf5mouse/hrnet/w32_256x256_mouse_pose.yaml
 create mode 100644 experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml
 create mode 100644 experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml
 create mode 100755 experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml
 create mode 100755 experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml
 create mode 100644 experiments/multimouse/multimouse-1.yaml
 create mode 100644 experiments/multimouse/multimouse-2.yaml
 create mode 100644 experiments/multimouse/multimouse-3.yaml
 create mode 100644 experiments/multimouse/multimouse-4.yaml
 create mode 100644 experiments/multimouse/multimouse-5.yaml
 create mode 100644 experiments/multimouse/multimouse-6.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_1.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_10.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_2.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_3.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_4.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_5.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_6.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_7.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_8.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-11-19_9.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_1.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_2.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_3.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_4.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_5.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_6.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_7.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-19_8.yaml
 create mode 100644 experiments/multimouse/multimouse_2019-12-31_1.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-15_1.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_01.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_02.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_03.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_04.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_05.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_06.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_07.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_08.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_09.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_10.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_11.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_12.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_13.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_14.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_15.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-17_16.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_01.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_02.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_03.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_04.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_05.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_06.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_07.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_08.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-21_09.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_01.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_02.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_03.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_04.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_05.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_06.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_07.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_08.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_09.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_10.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_11.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-22_12.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_01.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_02.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_03.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_04.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_05.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_06.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-01-30_07.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_01.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_02.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_03.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_04.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_05.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_06.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_07.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_08.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_09.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_10.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_11.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-03_12.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-10_01.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-10_02.yaml
 create mode 100644 experiments/multimouse/multimouse_2020-02-10_03.yaml
 create mode 100644 experiments/objseg/objseg.yaml
 create mode 100644 figures/hrnet.png
 create mode 100755 lib/Makefile
 create mode 100644 lib/config/__init__.py
 create mode 100644 lib/config/default.py
 create mode 100644 lib/config/models.py
 create mode 100644 lib/core/assocembedfunc.py
 create mode 100644 lib/core/assocembedloss.py
 create mode 100644 lib/core/cornerfunction.py
 create mode 100644 lib/core/evaluate.py
 create mode 100644 lib/core/fecalbolifunc.py
 create mode 100644 lib/core/focalloss.py
 create mode 100755 lib/core/function.py
 create mode 100644 lib/core/inference.py
 create mode 100644 lib/core/loss.py
 create mode 100644 lib/core/segfunction.py
 create mode 100755 lib/dataset/JointsDataset.py
 create mode 100644 lib/dataset/OpenFieldObjDataset.py
 create mode 100644 lib/dataset/__init__.py
 create mode 100755 lib/dataset/coco.py
 create mode 100644 lib/dataset/cornerdataset.py
 create mode 100644 lib/dataset/fecalbolidata.py
 create mode 100644 lib/dataset/hdf5mousepose.py
 create mode 100644 lib/dataset/mpii.py
 create mode 100644 lib/dataset/multimousepose.py
 create mode 100644 lib/dataset/simplepointdata.py
 create mode 100644 lib/models/__init__.py
 create mode 100644 lib/models/pose_hrnet.py
 create mode 100644 lib/models/pose_resnet.py
 create mode 100644 lib/nms/__init__.py
 create mode 100644 lib/nms/cpu_nms.pyx
 create mode 100644 lib/nms/gpu_nms.cu
 create mode 100644 lib/nms/gpu_nms.hpp
 create mode 100644 lib/nms/gpu_nms.pyx
 create mode 100644 lib/nms/nms.py
 create mode 100644 lib/nms/nms_kernel.cu
 create mode 100644 lib/nms/setup_linux.py
 create mode 100644 lib/utils/__init__.py
 create mode 100644 lib/utils/assocembedutil.py
 create mode 100644 lib/utils/transforms.py
 create mode 100644 lib/utils/utils.py
 create mode 100755 lib/utils/vis.py
 create mode 100644 lib/utils/xform.py
 create mode 100644 lib/utils/zipreader.py
 create mode 100644 requirements.txt
 create mode 100755 samplevids/samplevidsbatch.sh
 create mode 100755 test-multi-mouse-pose.sh
 create mode 100644 tools/_init_paths.py
 create mode 100644 tools/addpixelunits.py
 create mode 100644 tools/extractframes.py
 create mode 100644 tools/gathercvatframes.py
 create mode 100644 tools/infercorners.py
 create mode 100644 tools/inferfecalbolicount.py
 create mode 100644 tools/inferfecalbolicountbatch.py
 create mode 100644 tools/infermousepose.py
 create mode 100644 tools/infermultimousepose.py
 create mode 100644 tools/inferobjects.py
 create mode 100644 tools/listcvatnetids.py
 create mode 100755 tools/mousetrain.py
 create mode 100644 tools/ofobjecttrain.py
 create mode 100644 tools/rendercvat.py
 create mode 100644 tools/rendervidoverlay.py
 create mode 100644 tools/sampleframes.py
 create mode 100644 tools/testcornermodel.py
 create mode 100644 tools/testfecalboli.py
 create mode 100644 tools/testfecalbolidata.py
 create mode 100644 tools/testmouseposemodel.py
 create mode 100644 tools/testmultimousedata.py
 create mode 100644 tools/testmultimouseinference.py
 create mode 100644 tools/trainfecalboli.py
 create mode 100644 tools/trainmultimouse.py
 create mode 100644 tools/trainsimplepoint.py
 create mode 100644 vm/corner-detection-2021-08-25.def
 create mode 100644 vm/deep-hres-net-2019-06-28.def
 create mode 100755 vm/extract-frames.sh
 create mode 100755 vm/infer-corners-batch.sh
 create mode 100755 vm/infer-multi-poseest-batch.sh
 create mode 100755 vm/infer-obj-seg.sh
 create mode 100755 vm/infer-poseest-batch.sh
 create mode 100644 vm/multi-mouse-pose-2019-11-04.def
 create mode 100644 vm/multi-mouse-pose-2020-02-12.def
 create mode 100644 vm/obj-seg-2019-07-15.def
 create mode 100644 vm/obj-seg-2019-07-16.def
 create mode 100644 vm/obj-seg-2019-07-17.def
 create mode 100755 vm/train-fboli-detection.sh
 create mode 100755 vm/train-multi-mouse-pose.sh

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..4dff066
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,112 @@
+# IntelliJ project files
+.idea
+*.iml
+out
+gen
+
+### Vim template
+[._]*.s[a-w][a-z]
+[._]s[a-w][a-z]
+*.un~
+Session.vim
+.netrwhist
+*~
+
+### IPythonNotebook template
+# Temporary data
+.ipynb_checkpoints/
+
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+#lib/
+#lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+.dirconf
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+*.ipynb
+*.params
+*.json
+.vscode/
+
+lib/pycocotools/_mask.c
+lib/nms/cpu_nms.c
+
+output/*
+output-*/*
+models/*
+log/*
+log-*/*
+data/*
+external/
+temp*/
+testfbdata/
+fecal-boli-image-batch4/
+model-archive/
+
+draws/
+plot/
+
+*.avi
+*.simg
+*.sif
+*.h5
+runs/
+
+image-out*/
+sampled_frames*/
+sandbox/
+hard_frames/
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..fc86f0e
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Leo Xiao, 2021 KumarLabJax
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7111504
--- /dev/null
+++ b/README.md
@@ -0,0 +1,125 @@
+# Mouse Pose HR Net
+
+This repository is a forked and significantly modified version of the [official HRNet repository](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch) in order to support mouse pose inference. This repository contains two main approaches for inferring mouse pose. Firstly we have a single mouse pose inference script: `tools/infermousepose.py`. The pose output for single mouse pose is currently used by our [gait analysis system](https://github.com/KumarLabJax/gaitanalysis) to extract the gait metrics that analize. We also have implemented multi-mouse pose estimation. The entry point for multi-mouse pose is the `tools/infermultimousepose.py` script. We describe these tools and others in more detail below:
+
+## Tools
+
+* `tools/addpixelunits.py`: adds pixel-to-centimeter conversion metadata to the pose file
+* `tools/infercorners.py`: this script performs corner detection which we use to convert pixel space to real-world physical space
+* `tools/inferfecalbolicount.py`: this script will locate fecal boli in the open field and provide minute by minute counts in the output
+* `tools/infermousepose.py`: we use this script to infer single mouse pose for every frame in a video
+* `tools/infermultimousepose.py`: we use this script to infer multi mouse pose for every frame in a video
+* `tools/mousetrain.py`: our script for training the neural network on single mouse pose
+* `tools/testcornermodel.py`: script for testing the corner detection model and printing accuracy statistics
+* `tools/testfecalboli.py`: script for testing the fecal boli detection model and printing accuracy statistics
+* `tools/testmouseposemodel.py`: script for testing the single mouse pose model and printing accuracy statistics
+* `tools/testmultimouseinference.py`: script for testing the multi-mouse model and printing accuracy statistics
+
+This repository includes the following tools. All of them provide command line help if they are run like: `python3 tools/scriptname.py --help`. Additionally most include comments in the script source code which show example invokations.
+
+## Installation
+
+Before starting make sure you have `python3` installed. This code has been developed and tested on `python 3.8.10`. The recommended approach to installing dependencies is to use a virtual like:
+
+    python3 -m venv mousepose-venv
+    source mousepose-venv/bin/activate
+
+    # now switch to the pose repo dir and install requirements
+    cd $MOUSEPOSE_REPO_DIR
+    pip3 install -r requirements.txt
+
+Note that we also have prebuilt singularity VMs that we will be providing to simplify this process.
+
+## Pose File Formats
+
+The following describes our pose file HDF5 formats. `tools/infermousepose.py` will generate the v2 format for single mouse and `tools/infermultimousepose.py` will generate the v3 format.
+
+### Single-Mouse Pose Estimation v2 Format
+
+Each video has a corresponding HDF5 file that contains pose estimation coordinates and confidences. These files will have the same name as the corresponding video except that you replace ".avi" with "_pose_est_v2.h5"
+
+Each HDF5 file contains two datasets:
+
+* "poseest/points":
+  this is a dataset with size (#frames x #keypoints x 2) where #keypoints is 12 following the indexing scheme shown below and the last dimension of size 2 is used hold the pixel (x, y) position of the respective frame # and keypoint #
+  the datatype is a 16bit unsigned integer
+* "poseest/confidence":
+  this dataset has size (#frames x #keypoints) and assigns a 0-1 confidence value to each of the 12 points (sometimes the confidence goes slightly higher than 1). I tend to threshold at 0.3 as being "very low confidence". When the mouse is not in the arena almost all confidence values should be < 0.3.
+  the datatype is a 32 bit floating point
+
+The "poseest" group can have attributes attached
+
+* "cm_per_pixel" (optional):
+  defines how many centimeters a pixel of open field represents
+  the datatype is 32 bit floating point scalar
+* "cm_per_pixel_source" (optional):
+  defines how the "cm_per_pixel" value was set. Value will be one of "corner_detection", "manually_set" or "default_alignment"
+  the datatype is string scalar
+
+The 12 point indexes have the following mapping to mouse body part:
+
+* NOSE_INDEX = 0
+* LEFT_EAR_INDEX = 1
+* RIGHT_EAR_INDEX = 2
+* BASE_NECK_INDEX = 3
+* LEFT_FRONT_PAW_INDEX = 4
+* RIGHT_FRONT_PAW_INDEX = 5
+* CENTER_SPINE_INDEX = 6
+* LEFT_REAR_PAW_INDEX = 7
+* RIGHT_REAR_PAW_INDEX = 8
+* BASE_TAIL_INDEX = 9
+* MID_TAIL_INDEX = 10
+* TIP_TAIL_INDEX = 11
+
+### Multi-Mouse Pose Estimation v3 Format
+
+Each video has a corresponding HDF5 file that contains pose estimation coordinates and confidences. These files will have the same name as the corresponding video except that you replace ".avi" with "_pose_est_v3.h5"
+
+Several of the datasets below have a dimension of length "maximum # instances". This is because the instance count can vary over time for a video either because mice are added or removed, or because of an error in inference. Since each frame has it's own instance count you must consult the "poseest/instance_count" dataset to determine the number of valid instances per frame.
+
+Each HDF5 file contains the following datasets:
+
+* "poseest/points":
+  this is a dataset with size (#frames x maximum # instances x #keypoints x 2) where #keypoints is 12 following the indexing scheme shown below and the last dimension of size 2 is used hold the pixel (y, x) position of the respective frame # and keypoint #
+  the datatype is a 16bit unsigned integer
+* "poseest/confidence":
+  this dataset has size (#frames x maximum # instances x #keypoints) and assigns a confidence value to each of the 12 points. Values of 0 indicate a missing point. Anything higher than 0 indicates a valid point, so in that sense this dataset can be treated as binary.
+  the datatype is a 32 bit floating point
+* "poseest/instance_count":
+  this dataset has size (#frames) and gives the instance count for every frame (this can change when mice are added and removed, or if inference fails for some frames)
+  the datatype is an 8 bit unsigned integer
+* "poseest/instance_embedding":
+  Most applications can ignore this dataset. This is a dataset with size (#frames x maximum # instances x #keypoints) where #keypoints is 12 following the indexing scheme shown below. This dataset contains the instance embedding for the respective instance at the respective frame and point.
+  the datatype is a 32 bit floating point
+* "poseest/instance_track_id":
+  this is a dataset with size (#frames x maximum # instances) and contains the instance_track_id for each instance index on a per frame basis.
+
+The "poseest" group can have attributes attached
+
+* "cm_per_pixel" (optional):
+  defines how many centimeters a pixel of open field represents
+  the datatype is 32 bit floating point scalar
+* "cm_per_pixel_source" (optional):
+  defines how the "cm_per_pixel" value was set. Value will be one of "corner_detection", "manually_set" or "default_alignment"
+  the datatype is string scalar
+
+The 12 keypoint indexes have the following mapping to mouse body part:
+
+* NOSE_INDEX = 0
+* LEFT_EAR_INDEX = 1
+* RIGHT_EAR_INDEX = 2
+* BASE_NECK_INDEX = 3
+* LEFT_FRONT_PAW_INDEX = 4
+* RIGHT_FRONT_PAW_INDEX = 5
+* CENTER_SPINE_INDEX = 6
+* LEFT_REAR_PAW_INDEX = 7
+* RIGHT_REAR_PAW_INDEX = 8
+* BASE_TAIL_INDEX = 9
+* MID_TAIL_INDEX = 10
+* TIP_TAIL_INDEX = 11
+
+## Licensing
+
+This code is released under MIT license.
+
+The data produced in the associated paper used for training models are released on Zenodo under a Non-Commercial license.
diff --git a/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml b/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
new file mode 100644
index 0000000..16854cf
--- /dev/null
+++ b/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: 'coco'
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: 0.3
+  ROOT: 'data/coco/'
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 17
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 192
+  - 256
+  HEATMAP_SIZE:
+  - 48
+  - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml b/experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml
new file mode 100644
index 0000000..57101e9
--- /dev/null
+++ b/experiments/coco/hrnet/w32_384x288_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: 'coco'
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: 0.3
+  ROOT: 'data/coco/'
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 17
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 288
+  - 384
+  HEATMAP_SIZE:
+  - 72
+  - 96
+  SIGMA: 3
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml b/experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml
new file mode 100644
index 0000000..45c7011
--- /dev/null
+++ b/experiments/coco/hrnet/w48_256x192_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: 'coco'
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: 0.3
+  ROOT: 'data/coco/'
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 17
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 192
+  - 256
+  HEATMAP_SIZE:
+  - 48
+  - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
new file mode 100644
index 0000000..2844ff6
--- /dev/null
+++ b/experiments/coco/hrnet/w48_384x288_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: 'coco'
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: 0.3
+  ROOT: 'data/coco/'
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 17
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 288
+  - 384
+  HEATMAP_SIZE:
+  - 72
+  - 96
+  SIGMA: 3
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 24
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 24
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml b/experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..61e8f78
--- /dev/null
+++ b/experiments/coco/resnet/res101_256x192_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,83 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: 'coco'
+  ROOT: 'data/coco/'
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+  FLIP: true
+  ROT_FACTOR: 40
+  SCALE_FACTOR: 0.3
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
+  IMAGE_SIZE:
+  - 192
+  - 256
+  HEATMAP_SIZE:
+  - 48
+  - 64
+  SIGMA: 2
+  NUM_JOINTS: 17
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 101
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml b/experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..b1523c6
--- /dev/null
+++ b/experiments/coco/resnet/res101_384x288_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,83 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: 'coco'
+  ROOT: 'data/coco/'
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+  FLIP: true
+  ROT_FACTOR: 40
+  SCALE_FACTOR: 0.3
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
+  IMAGE_SIZE:
+  - 288
+  - 384
+  HEATMAP_SIZE:
+  - 72
+  - 96
+  SIGMA: 3
+  NUM_JOINTS: 17
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 101
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml b/experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..580c09f
--- /dev/null
+++ b/experiments/coco/resnet/res152_256x192_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,83 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: 'coco'
+  ROOT: 'data/coco/'
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+  FLIP: true
+  ROT_FACTOR: 40
+  SCALE_FACTOR: 0.3
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
+  IMAGE_SIZE:
+  - 192
+  - 256
+  HEATMAP_SIZE:
+  - 48
+  - 64
+  SIGMA: 2
+  NUM_JOINTS: 17
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 152
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml b/experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..156c576
--- /dev/null
+++ b/experiments/coco/resnet/res152_384x288_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,83 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: 'coco'
+  ROOT: 'data/coco/'
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+  FLIP: true
+  ROT_FACTOR: 40
+  SCALE_FACTOR: 0.3
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
+  IMAGE_SIZE:
+  - 288
+  - 384
+  HEATMAP_SIZE:
+  - 72
+  - 96
+  SIGMA: 3
+  NUM_JOINTS: 17
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 152
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml b/experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..1c00e86
--- /dev/null
+++ b/experiments/coco/resnet/res50_256x192_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,83 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: 'coco'
+  ROOT: 'data/coco/'
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+  FLIP: true
+  ROT_FACTOR: 40
+  SCALE_FACTOR: 0.3
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
+  IMAGE_SIZE:
+  - 192
+  - 256
+  HEATMAP_SIZE:
+  - 48
+  - 64
+  SIGMA: 2
+  NUM_JOINTS: 17
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 50
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml b/experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..caf7726
--- /dev/null
+++ b/experiments/coco/resnet/res50_384x288_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,83 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: 'coco'
+  ROOT: 'data/coco/'
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+  FLIP: true
+  ROT_FACTOR: 40
+  SCALE_FACTOR: 0.3
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
+  IMAGE_SIZE:
+  - 288
+  - 384
+  HEATMAP_SIZE:
+  - 72
+  - 96
+  SIGMA: 3
+  NUM_JOINTS: 17
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 50
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/corner/corner_2020-06-30_01.yaml b/experiments/corner/corner_2020-06-30_01.yaml
new file mode 100644
index 0000000..fa37992
--- /dev/null
+++ b/experiments/corner/corner_2020-06-30_01.yaml
@@ -0,0 +1,143 @@
+# settings in this file are a modified version of
+# experiments/fecalboli/fecalboli_2020-06-19_02.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-corner'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: simplepoint
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/corner/corner-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.4, 0.5, 0.6, 1.0)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: MSE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-05-0-08.yaml b/experiments/fecalboli/fecalboli_2020-05-0-08.yaml
new file mode 100644
index 0000000..d4a717a
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-05-0-08.yaml
@@ -0,0 +1,148 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.25, 0.5, 0.75)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_01.yaml b/experiments/fecalboli/fecalboli_2020-06-19_01.yaml
new file mode 100644
index 0000000..bba59d7
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_01.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_02.yaml b/experiments/fecalboli/fecalboli_2020-06-19_02.yaml
new file mode 100644
index 0000000..c406585
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_02.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: MSE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_03.yaml b/experiments/fecalboli/fecalboli_2020-06-19_03.yaml
new file mode 100644
index 0000000..387b987
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_03.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  # PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_04.yaml b/experiments/fecalboli/fecalboli_2020-06-19_04.yaml
new file mode 100644
index 0000000..a3a7491
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_04.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  # PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: MSE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_05.yaml b/experiments/fecalboli/fecalboli_2020-06-19_05.yaml
new file mode 100644
index 0000000..634a7c9
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_05.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  # PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 2
+      - 2
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 2
+      - 2
+      - 2
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 2
+      - 2
+      - 2
+      - 2
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_06.yaml b/experiments/fecalboli/fecalboli_2020-06-19_06.yaml
new file mode 100644
index 0000000..a7c042d
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_06.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  # PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 2
+      - 2
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 2
+      - 2
+      - 2
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 2
+      - 2
+      - 2
+      - 2
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: MSE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_07.yaml b/experiments/fecalboli/fecalboli_2020-06-19_07.yaml
new file mode 100644
index 0000000..219c171
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_07.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/fecalboli/fecalboli_2020-06-19_08.yaml b/experiments/fecalboli/fecalboli_2020-06-19_08.yaml
new file mode 100644
index 0000000..219c171
--- /dev/null
+++ b/experiments/fecalboli/fecalboli_2020-06-19_08.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-fecal-boli'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: fecalboli
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/fecal-boli/fecal-boli-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.2, 0.3, 0.4, 0.5, 0.6)
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    CONV_PADDING_MODE: 'reflect'
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 1
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-1.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-1.yaml
new file mode 100644
index 0000000..c38f725
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-1.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-10.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-10.yaml
new file mode 100644
index 0000000..4299fa5
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-10.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-11.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-11.yaml
new file mode 100644
index 0000000..ceea574
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-11.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS: []
+    # PRETRAINED_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    # - 'transition2'
+    # - 'stage3'
+    # - 'transition3'
+    # - 'stage4'
+    FROZEN_LAYERS: []
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    # - 'transition2'
+    # - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-12.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-12.yaml
new file mode 100644
index 0000000..2e60258
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-12.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  # SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS: []
+    # PRETRAINED_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    # - 'transition2'
+    # - 'stage3'
+    # - 'transition3'
+    # - 'stage4'
+    FROZEN_LAYERS: []
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    # - 'transition2'
+    # - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-13.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-13.yaml
new file mode 100644
index 0000000..a157c1b
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-13.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.01
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-14.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-14.yaml
new file mode 100644
index 0000000..aa313e5
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-14.yaml
@@ -0,0 +1,149 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.01
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 100
+  - 150
+  - 180
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-2.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-2.yaml
new file mode 100644
index 0000000..40824c8
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-2.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-3.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-3.yaml
new file mode 100644
index 0000000..08504e5
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-3.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-4.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-4.yaml
new file mode 100644
index 0000000..fb30b0e
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-4.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-5.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-5.yaml
new file mode 100644
index 0000000..e873c4c
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-5.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-6.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-6.yaml
new file mode 100644
index 0000000..dff0a06
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-6.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-7.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-7.yaml
new file mode 100644
index 0000000..7b4b2f9
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-7.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-8.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-8.yaml
new file mode 100644
index 0000000..b677f06
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-8.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    # HEAD_ARCH: 'SIMPLE_CONV'
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-9.yaml b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-9.yaml
new file mode 100644
index 0000000..23ebde6
--- /dev/null
+++ b/experiments/hdf5mouse/2019-05-23-param-search/mouse-pose-9.yaml
@@ -0,0 +1,147 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  # SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf1.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf1.yaml
new file mode 100644
index 0000000..e3a5beb
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf1.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf10.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf10.yaml
new file mode 100644
index 0000000..c95c8d8
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf10.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.0005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf11.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf11.yaml
new file mode 100644
index 0000000..952b900
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf11.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf2.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf2.yaml
new file mode 100644
index 0000000..eab0228
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf2.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf3.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf3.yaml
new file mode 100644
index 0000000..cda4064
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf3.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 4
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf4.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf4.yaml
new file mode 100644
index 0000000..043baa3
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf4.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 2
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf5.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf5.yaml
new file mode 100644
index 0000000..0e2fd96
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf5.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 128
+  - 128
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 128
+  - 128
+  SIGMA: 2
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf6.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf6.yaml
new file mode 100644
index 0000000..beebda8
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf6.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.0005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf7.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf7.yaml
new file mode 100644
index 0000000..19d914d
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf7.yaml
@@ -0,0 +1,142 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf8.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf8.yaml
new file mode 100644
index 0000000..71ab78a
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf8.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-18-param-search/mp-conf9.yaml b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf9.yaml
new file mode 100644
index 0000000..333795f
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-18-param-search/mp-conf9.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-18.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf1.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf1.yaml
new file mode 100644
index 0000000..188d9ae
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf1.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 230
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf2.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf2.yaml
new file mode 100644
index 0000000..3858ab8
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf2.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf3.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf3.yaml
new file mode 100644
index 0000000..95d76f1
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf3.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf4.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf4.yaml
new file mode 100644
index 0000000..8e4cf41
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf4.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf5.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf5.yaml
new file mode 100644
index 0000000..95d76f1
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf5.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf6.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf6.yaml
new file mode 100644
index 0000000..4f3aac7
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf6.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/2019-06-26-param-search/mp-conf7.yaml b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf7.yaml
new file mode 100644
index 0000000..7e38008
--- /dev/null
+++ b/experiments/hdf5mouse/2019-06-26-param-search/mp-conf7.yaml
@@ -0,0 +1,140 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/merged_pose_annos_2019-06-26.h5'
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    # FROZEN_LAYERS:
+    # - 'conv1'
+    # - 'bn1'
+    # - 'conv2'
+    # - 'bn2'
+    # - 'layer1'
+    # - 'transition1'
+    # - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/hrnet/w32_256x256_full_mouse_pose.yaml b/experiments/hdf5mouse/hrnet/w32_256x256_full_mouse_pose.yaml
new file mode 100644
index 0000000..6cb4cc8
--- /dev/null
+++ b/experiments/hdf5mouse/hrnet/w32_256x256_full_mouse_pose.yaml
@@ -0,0 +1,148 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-full-mouse-pose'
+LOG_DIR: 'log-full-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.3
+  TEST_SET: validation
+  TRAIN_SET: training
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  #PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  PRETRAINED: 'models/pytorch/pose_coco/pose_hrnet_w32_384x288.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  # HEATMAP_SIZE:
+  # - 64
+  # - 64
+  # SIGMA: 2
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 8
+  EXTRA:
+    HEAD_ARCH: 'SIMPLE_CONV'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    # HEAD_ARCH: 'CONV_TRANS_UPSCALE_3x3'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  # END_EPOCH: 210
+  END_EPOCH: 3
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/hdf5mouse/hrnet/w32_256x256_mouse_pose.yaml b/experiments/hdf5mouse/hrnet/w32_256x256_mouse_pose.yaml
new file mode 100644
index 0000000..57a4506
--- /dev/null
+++ b/experiments/hdf5mouse/hrnet/w32_256x256_mouse_pose.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+# GPUS: (0,1,2,3)
+GPUS: (0,)
+OUTPUT_DIR: 'output-mouse-pose'
+LOG_DIR: 'log-mouse-pose'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: hdf5mousepose
+  DATA_FORMAT: hdf5
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/hdf5mouse/point_tracker_withSeg+HE_Validfix.h5'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.25
+  TEST_SET: validation
+  TRAIN_SET: training
+  # PROB_RANDOMIZED_OCCLUSION: 0.0
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  # PROB_RANDOMIZED_CENTER: 0.0
+  PROB_RANDOMIZED_CENTER: 0.1
+  # JITTER_CENTER: 0.0
+  JITTER_CENTER: 0.1
+  # JITTER_BRIGHTNESS: 0.0
+  # JITTER_CONTRAST: 0.0
+  # JITTER_SATURATION: 0.0
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml b/experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml
new file mode 100644
index 0000000..894a844
--- /dev/null
+++ b/experiments/mpii/hrnet/w32_256x256_adam_lr1e-3.yaml
@@ -0,0 +1,120 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: mpii
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/mpii/'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.25
+  TEST_SET: valid
+  TRAIN_SET: train
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 16
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml b/experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml
new file mode 100644
index 0000000..1f621dc
--- /dev/null
+++ b/experiments/mpii/hrnet/w48_256x256_adam_lr1e-3.yaml
@@ -0,0 +1,120 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: mpii
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/mpii/'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.25
+  TEST_SET: valid
+  TRAIN_SET: train
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 16
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 48
+      - 96
+      - 192
+      - 384
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml b/experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..a48291b
--- /dev/null
+++ b/experiments/mpii/resnet/res101_256x256_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,86 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: mpii
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/mpii/'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.25
+  TEST_SET: valid
+  TRAIN_SET: train
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet101-5d3b4d8f.pth'
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  NUM_JOINTS: 16
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 101
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml b/experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..6002ac3
--- /dev/null
+++ b/experiments/mpii/resnet/res152_256x256_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,86 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: mpii
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/mpii/'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.25
+  TEST_SET: valid
+  TRAIN_SET: train
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet152-b121ed2d.pth'
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  NUM_JOINTS: 16
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 152
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml b/experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml
new file mode 100755
index 0000000..33db6fb
--- /dev/null
+++ b/experiments/mpii/resnet/res50_256x256_d256x3_adam_lr1e-3.yaml
@@ -0,0 +1,86 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: mpii
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: -1.0
+  ROOT: 'data/mpii/'
+  ROT_FACTOR: 30
+  SCALE_FACTOR: 0.25
+  TEST_SET: valid
+  TRAIN_SET: train
+MODEL:
+  NAME: 'pose_resnet'
+  PRETRAINED: 'models/pytorch/imagenet/resnet50-19c8e357.pth'
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 64
+  - 64
+  SIGMA: 2
+  NUM_JOINTS: 16
+  TARGET_TYPE: 'gaussian'
+  EXTRA:
+    FINAL_CONV_KERNEL: 1
+    DECONV_WITH_BIAS: false
+    NUM_DECONV_LAYERS: 3
+    NUM_DECONV_FILTERS:
+    - 256
+    - 256
+    - 256
+    NUM_DECONV_KERNELS:
+    - 4
+    - 4
+    - 4
+    NUM_LAYERS: 50
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 140
+  OPTIMIZER: 'adam'
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 90
+  - 120
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+  USE_GT_BBOX: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse-1.yaml b/experiments/multimouse/multimouse-1.yaml
new file mode 100644
index 0000000..27ddf02
--- /dev/null
+++ b/experiments/multimouse/multimouse-1.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse-2.yaml b/experiments/multimouse/multimouse-2.yaml
new file mode 100644
index 0000000..eda2331
--- /dev/null
+++ b/experiments/multimouse/multimouse-2.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  #TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse-3.yaml b/experiments/multimouse/multimouse-3.yaml
new file mode 100644
index 0000000..208a308
--- /dev/null
+++ b/experiments/multimouse/multimouse-3.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse-4.yaml b/experiments/multimouse/multimouse-4.yaml
new file mode 100644
index 0000000..3b1cfb8
--- /dev/null
+++ b/experiments/multimouse/multimouse-4.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse-5.yaml b/experiments/multimouse/multimouse-5.yaml
new file mode 100644
index 0000000..9caa5db
--- /dev/null
+++ b/experiments/multimouse/multimouse-5.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse-6.yaml b/experiments/multimouse/multimouse-6.yaml
new file mode 100644
index 0000000..856a773
--- /dev/null
+++ b/experiments/multimouse/multimouse-6.yaml
@@ -0,0 +1,145 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.00005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_1.yaml b/experiments/multimouse/multimouse_2019-11-19_1.yaml
new file mode 100644
index 0000000..d76d68b
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_1.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_10.yaml b/experiments/multimouse/multimouse_2019-11-19_10.yaml
new file mode 100644
index 0000000..a1d4145
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_10.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.075
+    USE_NEIGHBORING_FRAMES: true
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_2.yaml b/experiments/multimouse/multimouse_2019-11-19_2.yaml
new file mode 100644
index 0000000..402a04d
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_2.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: true
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_3.yaml b/experiments/multimouse/multimouse_2019-11-19_3.yaml
new file mode 100644
index 0000000..2f2850e
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_3.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_4.yaml b/experiments/multimouse/multimouse_2019-11-19_4.yaml
new file mode 100644
index 0000000..d1ff1c2
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_4.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: true
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_5.yaml b/experiments/multimouse/multimouse_2019-11-19_5.yaml
new file mode 100644
index 0000000..562a11c
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_5.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.075
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_6.yaml b/experiments/multimouse/multimouse_2019-11-19_6.yaml
new file mode 100644
index 0000000..dc861dc
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_6.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.075
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_7.yaml b/experiments/multimouse/multimouse_2019-11-19_7.yaml
new file mode 100644
index 0000000..57616e0
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_7.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_8.yaml b/experiments/multimouse/multimouse_2019-11-19_8.yaml
new file mode 100644
index 0000000..b3688de
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_8.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: true
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-11-19_9.yaml b/experiments/multimouse/multimouse_2019-11-19_9.yaml
new file mode 100644
index 0000000..2035c57
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-11-19_9.yaml
@@ -0,0 +1,146 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  # TARGET_TYPE: gaussian
+  TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.075
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_1.yaml b/experiments/multimouse/multimouse_2019-12-19_1.yaml
new file mode 100644
index 0000000..08eb77e
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_1.yaml
@@ -0,0 +1,147 @@
+# This is essentially the same as multimouse-4.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_2.yaml b/experiments/multimouse/multimouse_2019-12-19_2.yaml
new file mode 100644
index 0000000..77f9380
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_2.yaml
@@ -0,0 +1,147 @@
+# same as multimouse-2019-12-19_1.yaml except that here we use neighboring frames
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: true
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_3.yaml b/experiments/multimouse/multimouse_2019-12-19_3.yaml
new file mode 100644
index 0000000..0d13ead
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_3.yaml
@@ -0,0 +1,148 @@
+# This is the same as multimouse-2019-12-19_1.yaml
+# except that we use a larger image size
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_4.yaml b/experiments/multimouse/multimouse_2019-12-19_4.yaml
new file mode 100644
index 0000000..3773a87
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_4.yaml
@@ -0,0 +1,149 @@
+# This is the same as multimouse-2019-12-19_1.yaml
+# except that we use a larger image size and neighboring
+# frames
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_5.yaml b/experiments/multimouse/multimouse_2019-12-19_5.yaml
new file mode 100644
index 0000000..609e2cd
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_5.yaml
@@ -0,0 +1,148 @@
+# This is essentially the same as multimouse-2019-12-19_1.yaml except
+# the HEAD_ARCH is CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_6.yaml b/experiments/multimouse/multimouse_2019-12-19_6.yaml
new file mode 100644
index 0000000..0a64318
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_6.yaml
@@ -0,0 +1,148 @@
+# same as multimouse-2019-12-19_1.yaml except that here we use neighboring frames
+# and the HEAD_ARCH is CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: true
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_7.yaml b/experiments/multimouse/multimouse_2019-12-19_7.yaml
new file mode 100644
index 0000000..35ab994
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_7.yaml
@@ -0,0 +1,149 @@
+# This is the same as multimouse-2019-12-19_1.yaml
+# except that we use a larger image size and
+# the HEAD_ARCH is CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-19_8.yaml b/experiments/multimouse/multimouse_2019-12-19_8.yaml
new file mode 100644
index 0000000..b00ac03
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-19_8.yaml
@@ -0,0 +1,149 @@
+# This is the same as multimouse-2019-12-19_1.yaml
+# except that we use a larger image size and neighboring
+# frames and the HEAD_ARCH is CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2019-12-31_1.yaml b/experiments/multimouse/multimouse_2019-12-31_1.yaml
new file mode 100644
index 0000000..10287c3
--- /dev/null
+++ b/experiments/multimouse/multimouse_2019-12-31_1.yaml
@@ -0,0 +1,151 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.002
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-15_1.yaml b/experiments/multimouse/multimouse_2020-01-15_1.yaml
new file mode 100644
index 0000000..a9e0257
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-15_1.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 5000
+  POSE_HEATMAP_WEIGHT: 5.0
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_01.yaml b/experiments/multimouse/multimouse_2020-01-17_01.yaml
new file mode 100644
index 0000000..b2cd9bb
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_01.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.05
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_02.yaml b/experiments/multimouse/multimouse_2020-01-17_02.yaml
new file mode 100644
index 0000000..25cf84d
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_02.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.01
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.05
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_03.yaml b/experiments/multimouse/multimouse_2020-01-17_03.yaml
new file mode 100644
index 0000000..ac8eda8
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_03.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.05
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_04.yaml b/experiments/multimouse/multimouse_2020-01-17_04.yaml
new file mode 100644
index 0000000..71f1749
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_04.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.01
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.05
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_05.yaml b/experiments/multimouse/multimouse_2020-01-17_05.yaml
new file mode 100644
index 0000000..8a5f3ef
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_05.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.1
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_06.yaml b/experiments/multimouse/multimouse_2020-01-17_06.yaml
new file mode 100644
index 0000000..dda40ae
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_06.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.01
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_07.yaml b/experiments/multimouse/multimouse_2020-01-17_07.yaml
new file mode 100644
index 0000000..cb198a6
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_07.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.01
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.1
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_08.yaml b/experiments/multimouse/multimouse_2020-01-17_08.yaml
new file mode 100644
index 0000000..769c8d6
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_08.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.002
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.1
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_09.yaml b/experiments/multimouse/multimouse_2020-01-17_09.yaml
new file mode 100644
index 0000000..cf04673
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_09.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: BALANCED_BCE
+  POSE_HEATMAP_WEIGHT: 0.1
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+  BALANCED_BCE_FAIRNESS_QUOTIENT: 0.01
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_10.yaml b/experiments/multimouse/multimouse_2020-01-17_10.yaml
new file mode 100644
index 0000000..b9fee7e
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_10.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 5000
+  POSE_HEATMAP_WEIGHT: 0.002
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_11.yaml b/experiments/multimouse/multimouse_2020-01-17_11.yaml
new file mode 100644
index 0000000..8e986bf
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_11.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 5000
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_12.yaml b/experiments/multimouse/multimouse_2020-01-17_12.yaml
new file mode 100644
index 0000000..b85ce21
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_12.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 2500
+  POSE_HEATMAP_WEIGHT: 0.002
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_13.yaml b/experiments/multimouse/multimouse_2020-01-17_13.yaml
new file mode 100644
index 0000000..51c30f3
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_13.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 2500
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_14.yaml b/experiments/multimouse/multimouse_2020-01-17_14.yaml
new file mode 100644
index 0000000..c376968
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_14.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 5000
+  POSE_HEATMAP_WEIGHT: 0.002
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_15.yaml b/experiments/multimouse/multimouse_2020-01-17_15.yaml
new file mode 100644
index 0000000..3ff9363
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_15.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 10000
+  POSE_HEATMAP_WEIGHT: 0.002
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-17_16.yaml b/experiments/multimouse/multimouse_2020-01-17_16.yaml
new file mode 100644
index 0000000..ddc7590
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-17_16.yaml
@@ -0,0 +1,152 @@
+# first attempt at using balanced BCE
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 10000
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_01.yaml b/experiments/multimouse/multimouse_2020-01-21_01.yaml
new file mode 100644
index 0000000..b7144d7
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_01.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 5000
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_02.yaml b/experiments/multimouse/multimouse_2020-01-21_02.yaml
new file mode 100644
index 0000000..95e631a
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_02.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 5000
+  POSE_HEATMAP_WEIGHT: 0.015
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_03.yaml b/experiments/multimouse/multimouse_2020-01-21_03.yaml
new file mode 100644
index 0000000..a5436f9
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_03.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 2500
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_04.yaml b/experiments/multimouse/multimouse_2020-01-21_04.yaml
new file mode 100644
index 0000000..e0cb492
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_04.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 2500
+  POSE_HEATMAP_WEIGHT: 0.03
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_05.yaml b/experiments/multimouse/multimouse_2020-01-21_05.yaml
new file mode 100644
index 0000000..2194151
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_05.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 2500
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_06.yaml b/experiments/multimouse/multimouse_2020-01-21_06.yaml
new file mode 100644
index 0000000..5a326fe
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_06.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_07.yaml b/experiments/multimouse/multimouse_2020-01-21_07.yaml
new file mode 100644
index 0000000..e022e21
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_07.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.03
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_08.yaml b/experiments/multimouse/multimouse_2020-01-21_08.yaml
new file mode 100644
index 0000000..1a84504
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_08.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-21_09.yaml b/experiments/multimouse/multimouse_2020-01-21_09.yaml
new file mode 100644
index 0000000..d6c8e0b
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-21_09.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-17_11
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.06
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_01.yaml b/experiments/multimouse/multimouse_2020-01-22_01.yaml
new file mode 100644
index 0000000..2b64338
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_01.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.03
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_02.yaml b/experiments/multimouse/multimouse_2020-01-22_02.yaml
new file mode 100644
index 0000000..0e88cb3
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_02.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_03.yaml b/experiments/multimouse/multimouse_2020-01-22_03.yaml
new file mode 100644
index 0000000..ba01ae8
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_03.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_04.yaml b/experiments/multimouse/multimouse_2020-01-22_04.yaml
new file mode 100644
index 0000000..2c2419e
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_04.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.06
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_05.yaml b/experiments/multimouse/multimouse_2020-01-22_05.yaml
new file mode 100644
index 0000000..6ecdf0a
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_05.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1000
+  POSE_HEATMAP_WEIGHT: 0.03
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_06.yaml b/experiments/multimouse/multimouse_2020-01-22_06.yaml
new file mode 100644
index 0000000..51871ef
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_06.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1000
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_07.yaml b/experiments/multimouse/multimouse_2020-01-22_07.yaml
new file mode 100644
index 0000000..2663907
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_07.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1000
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_08.yaml b/experiments/multimouse/multimouse_2020-01-22_08.yaml
new file mode 100644
index 0000000..3a5be8f
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_08.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1000
+  POSE_HEATMAP_WEIGHT: 0.06
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_09.yaml b/experiments/multimouse/multimouse_2020-01-22_09.yaml
new file mode 100644
index 0000000..ddba562
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_09.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 750
+  POSE_HEATMAP_WEIGHT: 0.03
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_10.yaml b/experiments/multimouse/multimouse_2020-01-22_10.yaml
new file mode 100644
index 0000000..f8ab9ed
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_10.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 750
+  POSE_HEATMAP_WEIGHT: 0.02
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_11.yaml b/experiments/multimouse/multimouse_2020-01-22_11.yaml
new file mode 100644
index 0000000..ec1f7e4
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_11.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 750
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-22_12.yaml b/experiments/multimouse/multimouse_2020-01-22_12.yaml
new file mode 100644
index 0000000..6c92cb9
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-22_12.yaml
@@ -0,0 +1,152 @@
+# based off of 2020-01-21_07
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 750
+  POSE_HEATMAP_WEIGHT: 0.06
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_01.yaml b/experiments/multimouse/multimouse_2020-01-30_01.yaml
new file mode 100644
index 0000000..1441fa1
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_01.yaml
@@ -0,0 +1,148 @@
+# copy of multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_02.yaml b/experiments/multimouse/multimouse_2020-01-30_02.yaml
new file mode 100644
index 0000000..d7959bb
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_02.yaml
@@ -0,0 +1,152 @@
+# copied from multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 1500
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_03.yaml b/experiments/multimouse/multimouse_2020-01-30_03.yaml
new file mode 100644
index 0000000..7e20dfb
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_03.yaml
@@ -0,0 +1,148 @@
+# based on multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_04.yaml b/experiments/multimouse/multimouse_2020-01-30_04.yaml
new file mode 100644
index 0000000..3891c9c
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_04.yaml
@@ -0,0 +1,148 @@
+# based on multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  IMAGE_SIZE:
+  - 384
+  - 384
+  HEATMAP_SIZE:
+  - 384
+  - 384
+  # IMAGE_SIZE:
+  # - 256
+  # - 256
+  # HEATMAP_SIZE:
+  # - 256
+  # - 256
+  SIGMA: 9
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_05.yaml b/experiments/multimouse/multimouse_2020-01-30_05.yaml
new file mode 100644
index 0000000..e5f19d9
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_05.yaml
@@ -0,0 +1,152 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.04
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_06.yaml b/experiments/multimouse/multimouse_2020-01-30_06.yaml
new file mode 100644
index 0000000..3e42de5
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_06.yaml
@@ -0,0 +1,152 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.06
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-01-30_07.yaml b/experiments/multimouse/multimouse_2020-01-30_07.yaml
new file mode 100644
index 0000000..6566104
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-01-30_07.yaml
@@ -0,0 +1,152 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  # WD: 0.0001
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_01.yaml b/experiments/multimouse/multimouse_2020-02-03_01.yaml
new file mode 100644
index 0000000..7b6fe09
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_01.yaml
@@ -0,0 +1,147 @@
+# copy of multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_02.yaml b/experiments/multimouse/multimouse_2020-02-03_02.yaml
new file mode 100644
index 0000000..89a7c4e
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_02.yaml
@@ -0,0 +1,147 @@
+# copy of multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_03.yaml b/experiments/multimouse/multimouse_2020-02-03_03.yaml
new file mode 100644
index 0000000..2e6b535
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_03.yaml
@@ -0,0 +1,147 @@
+# copy of multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0002
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_04.yaml b/experiments/multimouse/multimouse_2020-02-03_04.yaml
new file mode 100644
index 0000000..b247ddc
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_04.yaml
@@ -0,0 +1,147 @@
+# copy of multimouse_2019-11-19_1.yaml
+
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0005
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_05.yaml b/experiments/multimouse/multimouse_2020-02-03_05.yaml
new file mode 100644
index 0000000..b254d83
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_05.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_06.yaml b/experiments/multimouse/multimouse_2020-02-03_06.yaml
new file mode 100644
index 0000000..d4cdaf6
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_06.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_07.yaml b/experiments/multimouse/multimouse_2020-02-03_07.yaml
new file mode 100644
index 0000000..b54cf1a
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_07.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0002
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_08.yaml b/experiments/multimouse/multimouse_2020-02-03_08.yaml
new file mode 100644
index 0000000..8ea2072
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_08.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0005
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_09.yaml b/experiments/multimouse/multimouse_2020-02-03_09.yaml
new file mode 100644
index 0000000..5ce923a
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_09.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 300
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_10.yaml b/experiments/multimouse/multimouse_2020-02-03_10.yaml
new file mode 100644
index 0000000..9b810ad
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_10.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 300
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_11.yaml b/experiments/multimouse/multimouse_2020-02-03_11.yaml
new file mode 100644
index 0000000..a010288
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_11.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 300
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0002
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-03_12.yaml b/experiments/multimouse/multimouse_2020-02-03_12.yaml
new file mode 100644
index 0000000..6bdcc67
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-03_12.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-01-22_03.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 300
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0002
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-10_01.yaml b/experiments/multimouse/multimouse_2020-02-10_01.yaml
new file mode 100644
index 0000000..2d6d4ab
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-10_01.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-02-03_06.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-10_02.yaml b/experiments/multimouse/multimouse_2020-02-10_02.yaml
new file mode 100644
index 0000000..e758994
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-10_02.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-02-03_06.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.00005
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/multimouse/multimouse_2020-02-10_03.yaml b/experiments/multimouse/multimouse_2020-02-10_03.yaml
new file mode 100644
index 0000000..91e40bc
--- /dev/null
+++ b/experiments/multimouse/multimouse_2020-02-10_03.yaml
@@ -0,0 +1,151 @@
+# based on multimouse_2020-02-03_06.yaml
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output-multi-mouse'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: multimousepose
+  DATA_FORMAT: cvat
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 6
+  PROB_HALF_BODY: -1.0
+  ROOT: ''
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/multi-mouse-val-set.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 12
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  #TARGET_TYPE: gaussian
+  # TARGET_TYPE: exp_decay
+  TARGET_TYPE: point
+  # IMAGE_SIZE:
+  # - 384
+  # - 384
+  # HEATMAP_SIZE:
+  # - 384
+  # - 384
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  SIGMA: 6
+  EXTRA:
+    EXP_DECAY_LAMBDA: 0.1
+    USE_NEIGHBORING_FRAMES: false
+    OUTPUT_CHANNELS_PER_JOINT: 2
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  #USE_TARGET_WEIGHT: true
+  POSE_LOSS_FUNC: WEIGHTED_BCE
+  POSITIVE_LABEL_WEIGHT: 500
+  POSE_HEATMAP_WEIGHT: 0.08
+  ASSOC_EMBEDDING_WEIGHT: 0.001
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 2000
+  OPTIMIZER: adam
+  LR: 0.0001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 1000
+  - 1750
+  WD: 0.00002
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/experiments/objseg/objseg.yaml b/experiments/objseg/objseg.yaml
new file mode 100644
index 0000000..875df06
--- /dev/null
+++ b/experiments/objseg/objseg.yaml
@@ -0,0 +1,134 @@
+AUTO_RESUME: false
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 50
+
+DATASET:
+  COLOR_RGB: false
+  DATASET: OpenFieldObjDataset
+  DATA_FORMAT: cvat_seg
+  FLIP: true
+  ROOT: 'data/hdf5mouse/CCF1_TrainingFrames'
+  CVAT_XML: 'data/hdf5mouse/CCF1_Object_Annotations.xml'
+  SCALE_FACTOR: 0.3
+  TEST_SET: 'data/hdf5mouse/CCF1_Validation_Filenames.txt'
+  PROB_RANDOMIZED_OCCLUSION: 0.75
+  MAX_OCCLUSION_SIZE: 150
+  OCCLUSION_OPACITIES: (0.5, 0.75, 1.0)
+  PROB_RANDOMIZED_CENTER: 0.1
+  JITTER_CENTER: 0.1
+  JITTER_BRIGHTNESS: 0.5
+  JITTER_CONTRAST: 0.5
+  JITTER_SATURATION: 0.5
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  # we're abusing the NUM_JOINTS parameter to get 1
+  # output map from the model (segmentation confidence)
+  NUM_JOINTS: 1
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  IMAGE_SIZE:
+  - 256
+  - 256
+  HEATMAP_SIZE:
+  - 256
+  - 256
+  EXTRA:
+    HEAD_ARCH: 'CONV_TRANS_UPSCALE_5x5'
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FROZEN_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 600
+  OPTIMIZER: adam
+  LR: 0.0005
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 400
+  - 500
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  MODEL_FILE: ''
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true
diff --git a/figures/hrnet.png b/figures/hrnet.png
new file mode 100644
index 0000000000000000000000000000000000000000..ac8bb768448134569aea762da74ac4e463069ad3
GIT binary patch
literal 29707
zcmXtf1yohd_cq<#(sdCe1p(=h?ob+Oke2S2Zn$)ZgmiZ!E#2MSjTbK8@%?>&)^g#Q
zGtA81vt#Dj&k0vilEy?MMT3EX!IYJeP=kSiV}OByMMFUXS{P)qlYw6dPU5nkQGg$B
z6w@%^8TGr2wi65tR{y_$uu05VWI*E^XGtw*b$fGXHzP+g7&kXJHcLBeCljOZW^DG3
z78&P)q%bhlFtQS2pWQQ0v)tT2&&`K!&?Cr*;=rj@`4fD?X3awjq^8ZKre>rl_di>{
zOTN%y%x9$d9Pt4i1&vV>M@%Jto#qx#=CDu{FX0KQ^(IxJC^L;uecZHbf(v_$2~sZ0
zdGLJyl@%jcJWqv@0PUiV;xPH<)qS&w%2pM44i=A}Si(y6XW1mE#w9@_xGGi!+JLUt
zZ_SO2pGN4^#LJb^F*EP-;{Sg;GJF6h*8K)djFvt55v%pTA@Wpqt!*k&GZB4}*WpLt
z2rg4x{(ooCL0nS2L4YjZg?-+S@c%8rpc*|j_D3}Rl7I{<{{NQMY3QFEB;H{QJaHgw
zb$5uOvm3m{D2aEX;C`>hNZ_H;E0L!H1BUwxf70%qWh}~W2>;79Ks;~Ke`G|J@tS>0
zv}oJ}tFG?lor>OpH4{MuwP-~uARQM2d(_fc=mIc?m_Hr^T5vv^%AX#nT=$EpY^3d;
zKYusH(1^5tZrpFEN-p-~8^-t>w7U1M1UNyCCyWFU<Y10}qjLwm3HufXgQR<gjVKIH
z3hM{|3QIY%^ng%J<WBr8I4p)8=RKSXdWnBnQZ^sE^@5_V#AK@fov5XcaI$zF3d859
zaPfE%MvPO$ce*xZ66Yj|V%_AC4<P?OqKgGMAJ%ng+AriM`|a%LdUZ}O%%Yl+4^Fr>
z1;8HgS^bvC8_KB{>wFz#ZPah%FXB~0C{-Olp_K4W)#Op=_K4h>iIa;-HvWyf!V2}H
zVElC@7%Yz4U&0TECJEiOyGFycmV+;@h_Ob}`~H0#u|L=(GEg++H{?VEGWQ~rr&2<Q
zA55E5OFXDA4C_j5yj}A91TF?qB0BSi+J65}L1jiMunaOeK$%a8ieY5M@xL3jcC!sT
z*}cP>ozQg2XJS_a3p#(lTEou;9PwSS??!zgKn+xE`Z#~hZzy0Q4VcEKa(g6f80%-F
zF!8)gKKZzHLbG@lQv6G56~f_OKE!8O!3-w_7Q9c!u6r0T+wsmjB=jZ+Op!19$@?Ag
zx7$?lW0|ki583J6u$xzzH=}ghdvR{Um4KztzGDW)|7M0sYx>U$dBOi4o8@K3<pKM3
z{`Akdwy^AJk_;iRe_m`Nfvqt9uggEP-<Yg5RzoHUcBX9D70S(<Yy8?RT4p=NmMzk4
zrTZuIrb3-y1DRCI)@v30V4>Ctip(>95nUhC<kIPqdxb31ol|#X-RABqB5<JlS}8*B
zCWR=&=Ww#EwZG9-e4xwz6pL`IZR-W+K5eC|pEcfOPSC^*PWMP?pOZd`*Q^e8KYH%u
z8LzV6d7jL?Ia&PJ6+74Xcj#h->|mjBc^&(Stezg}Nw<N1Z2aj(U$84V`i81$c^>Ae
zQdf=o;5l7m1^Mv1FMF9SX1rGxXp8SaTGy89F!+XS>0?)J)(tXE1w4x-#`DDZ)5G$|
zu2ACfSCKN?V8`()qXr+2%hb6>2L8-f1E9%iyvnY@2W4{2O1C`vhE7lLRm63?>D->1
z;LyB*73;BSu2F_PlS+L3AkXffsr=``l{j+is6V8GTI6`I$43bN7XXxNhudlodCtN9
zU!zz+O#cfDqU%KXb~|<&F%7WrI0oSV`G4@r;oJvnHnj%Tyy)It%xCzc@z#yO#^#`@
zvsh~TO?uIHYT+LyYb^ZGzE2J|x@Vxt%xTM`!M6oick&LJy-R3%tBNqK%=r6Z-#w{S
z#;^=z)NL-ZxQ%r;lu?Kq@mUOuAtHhIl4KV`aABfrlb^ydEIy*CJB_S`PJiqiZAin(
zRtX)!r9=9Fg4xkw$nAPcjF^z9em^isWxu<*DoXwD{V(8s|8C>!4IC4br56?j|NLTX
zqh+4VOcACACe?aXXVQxi@TLuTLv+aOCz-)xTkY8&26g!)Zuj<GGzG*%OJ_Oy3>k|Y
zH9eg+iCNQTt=4R3DT}{>$!y&f`uuxxLO4Tz^KdFL7Ea`8O0f8Ig-)sM&3?4G;O(SO
z)2XsTI}X3^^C{e5Ox7`6`E7wmt6J51M<O!T=~A)Hl3Ekx>4sm|yW5A2*KvNg^JfZZ
zy(0SE@mwb7w@H(ZQE-dxYi}^K`0JyI_54y@fPEJ__}kpCVfG232U9j9POI5^xT7Q9
zc?v=I@`8+=5V(stUU&uK+p!TQ<#p%&XC>3VkOp3Ifkxp`Q+_MXF3<Z@FsI4D;jBhQ
z`tzned=}0dGP0YMqZvO_-Q|t4g+8X6eV_E@e90XUlxGq(zNY4GA%`z<1)41+@YDO{
z&Z6{B2CvR3@P{ge9ycKw<C#x=;1y_SLf4C{u}+=&`whs7Xh(@ogW63`kQe2|2*3A<
z69WU()@rHN;rRB5&K~aJX+=u<^RLfa&;WR^%%O2%$5H4^F>hKrk8K=}%Q57QD7xQD
z32}q<LTa<=aJ_Z1U!r=kZya=1y+i%)?NRH@#ywww-P&Qe$g3b%%8PnYdkS=n%O}C>
z;k>r*2_<A}Jeig3)n%)9Uhg3`l+r5kdPbRRhV(;;&_+<rr?^?Gs?9U!Eyz&UoH-pC
z_b(GoaH@i|A}pK2X+%Pc)8Nna|2^3(+Y^BpW>nSv(GzI_q1RJ2KZi5EPm4(pluZ4%
z$xm@Ku`2{1R8+t8WL5}mA`B73m$LOW<Dt00*cH70wX>upE5)JUd*z#U`=6ICElDdV
z3*s*J76Qvhy_X0dcAlLrR$ef~WpI@oo)DRk8yy!mzE(+fC$vXTL>|@!GmGFD$%Ai?
z?=D$Y@iu$=_dj*GU5qzL$oiw>x8;wp$ieBQN-X1nIy{BE<-rJ%91aX}3PlC5tO_>a
zc-D|0$<&YdfDlvuL=24*!?UGSs~nLD)OZY7lK<ugZv=<4zWckC-YJpDh-zqLEDa8W
z3^?lgqv0#&)B6X@n%?YVW<?|z;xHt=`|{0v!pFq71i6-DidGKL!EA-KF8KChgLUru
z2Km>2jS{Jg1fVSbH9vVUqftW2_f41F+SsZSW<ay-U1O|raGz5p1_lP6U5{sr^zI+d
zU%po+B>;K@qz^>Q*{czO0p`@pd`Ts-hD5U{kcZ=)q^64!Fz86ia-iT><kPDJhtsJV
z+jjK?4`;FJ@<30XpW5?xi*wBP(`OH<6~z6e?>P0U%E{e^5eBqu(IwT!IDPD6DkVPH
zXnF1R1}A1{R_a}8=>6G=jg|4a?16|QVV}P_Qr`K1vs7(#mbBr`)#>p&!pwdNaD~fR
zz<4OGcN1d~+FxpF-i!FXzJN6d-+^0_I<Fsh=+xR=PH?8%+=$^94g7vTY$)4mcRt%f
zyzWS||1O$fhX_L5yH~2x<dWWfuwJNq3Y-w3(jLxaN&-8_4^n!uon_cFNF=k{XW5cz
zeXj6yi{!CQ{{wlx!Dde5`Z;#*2RZ8>a@;$=t_%LF+U6RKdeQMP0gV-$hH4az9OSSM
zE!JC({?V=*kkT=aK%{j!ky0k%+K+~%5NPZ)d#$%b=WMiF^Kn|P$Bsa1ePL5+Hya&b
zCT7nj(*5;0T$hZ>*InlLa_i+2m1ju<Z$x48taYv^h6F2?Hv+*;6AdNK)IWVh5-D05
zvC8o~e=|c-oi0($`qwfH`ZbnsI+@$uw2{r|u^WFMf&+4JxYK0$S}qZ;DlvR}M74{q
zdJTnCVN?@=x?b%tRMh|h!T|vsDOGE2SI(m>XM`E?4)5DUuK-uAss+h~VBVxNzK7+y
zB^}#fW@v22Xy3J`6YS1oe8X9&nJe;{7I4%=i=S_`B!=;ooiW`ZJKM8z;ZqXf4KF?K
z<S2dWgN!7Hk*RjS9;NZviVaZt*>vzZ?MwGGsu^p7osV}3Rr3V`nvIPkkGns@ZG6qh
zFLb*gj-r*LwFyrcq@v1`2)7^NJ<k|PV*bG0$XGht?sD=GB!|Hw$78$f=(Il;3%Bk|
z%($K}Ntba>Mi-8U@{HV(=DulG7-W>2fUlmQ39h%_m}bm3P3lM`p^HqY)TiPIV+?=d
z>k3O~w9W9dWtH>1br=3`&q>6<^_DZvovf-qC1ee^WWXdn{~Nm?Nj_O2c9dD}?~^$A
z2h1Fx494DmqMB}NC=YF@IG|%co#yXyk?+0)DuYRA7zUCVk7f>nXa7y+oNHGdo)AGe
z+TQ?-oxS(zr8ju=_VCJkdBGqywwk~!AWR!vl9bTxSF%u%p$R5oQdvOE3`^kY)M7$~
zMLOcvq8>>7X2w;$upvdAzk7g)&ai6nS=IRVlx8L0RKhBSP0C?okqETb=7z$rdfg4r
zdf@l8cj(0;DcfW>k*X;N_g7Z6R4v%NLU(W<U3EHFtfJ->`hXrn1Zu1ejWX^+{}&kq
zu<w2{!d{uTS1lMyeSHsmgk(J&)ZHiC-l+|)ZzKT0BX?#S)2kZqp5`qsrsfc{rms%x
zfSu26=pUlb!28eCa+DssmVIG(((m8*Fvvmgs&JKcBNF(Wh%9VYn}udjuwA^E{yfQo
z1z)`l3?lHj7Vb!g4tx94lO8M2yz&;cR`LtKlnvxb5rUA=X1N!%EiLseetrstV4QWn
zm&qdh^SNA$p;Et%h(~hRZA0-xJ0t3CDv9Q|#p-eC*S`y4#ceKL94U<2b^EbnX<X9C
z-f|UF>goF3F^~cKd<o4!#Z(ybM$Zl0X6XZKf{4hn*P8_cGF>pqM-|OE?e{4ANeOSb
z1{i@z4#cB_0=Er{`=}|&aMd+sHbYG?2AR3HDqIhfLirOqWATo=>l!Z-E=US{_KS-K
zMj+ioG#)4*7!MSZ=dPzW#*R*`g!Mrc5A9igq9-tGv|FmoR?$DuF3L<Dj6rEbu9~pp
z%W0Ea`K!(t<DCWzne~d#4VXqEeCnjUSzJNk?0pZT9FGl(l4>oGVh7AdlRFz~)QiO|
zKZV?4+}y~8qiW?X7(R)mE*3;b4z3V_fCTvA2V>2FFJ~O1DzF|=AWE97G}_wD_}rW|
zC1TLXMd?}mCNws-ihX+M3SGegWpG-Ik(kM-8e`372?_XxCFFhb3U$4J8XAc#Bhe~A
z=dC&DiqX9SE2s?lHp{bi(E0H|pF;1kZbKuc)|7vZM~DHbDvfLK^B-n81}QCHsA&J8
zi-umFQQxx3x{uo<*IKKPdl~yuyNA4^#PA{Q{sdLYB0fk;`^X_clo-^Czv$=D`4fL~
zb>Kd*vDO~Uc=bk1&jQpeA-hzosu;y4cRHty%A-nqp3$CfdUqTRL@`X#S<(yx30WJP
zv16c7#;Q63Z>S51M~~A3jh76H$BI?jbvirP!l~|a?N__QKQ4ioO-RCD@7DdQ+CMP=
zD#+ieLnjYW&Xhs%+YH#>0<`&VS9zr#hnV=r%-MPF??sp23Up^sIn?dc-QaPvM+no<
zFX+wG)I<OCfyho}Y7>Q!=P$#B$}=><D~SdFrVYYhU&^k1+}#6cl|^3e&2Abyys#zN
zbUysX!M+1B3mY0bdSrdQ-$v(Sm}F#;1en=P6G%EqwPx}SEs%4^304EB_5Sr<*ZSXb
zi;3)5t;W+xf4>sV5|yI0H8)Ln^QGE`w9(`W?#!3U$##!|CvL0Hl~?C&R2zRYk+CV6
zZ)eqGi4WIbdPH0Oe!#@S$lsDEpyES*z`Tx*kF)Vw&rAPiaZ^a;$awb!8o&3pO^^1&
zhx)LFmmFXIAX4sg)ZNZcO(^K-(bbK9r;8J%23<wN20Sg|0dPja6=zi3RuQFNJJNfc
z9>(%?8s1qNc)$OZzt@ONb(*8s>76Fp8nC>#?sd@u{Y_l(PRs6O(LvO)#nBD%8KIw0
zp(u{izVpdu%KQ)M{<5%Pw>6WLdx?cb#z^PO{f(rF$;rX8@b^vYQmN_<LRwn1P~fDh
z>M&|s)qZW{dA7_AcO~#etttS{t7E(?)xQai07d@3iRQl>5CR8-VaV4YCZqg+I7=|C
z;eVh7@L$R9-2a0<lu=-R9dW1rLn8j&g#>93ND3DW|J|<tHC+C%*)-`uQty>uL|qMa
z@S{{E*??+<9rMus3`izf@YVbV_uRK3<t~b!n?F+!7!(Ra=a$ah`l9oMjB{d7=e(H^
zVv?ShS6Zn`W$LL;_V{KI-Wz!}w#JvoB_2u69I|4_0y%+a4(RA^619#p^Mg0!ZaV9V
z-6Ujs4Xt;6#3u>9k{EgQXAOIyd!PMMc?tTC@<^y{&b$1{7pdutD(vcXK~C3@qMGWF
z^xLtfb^B;&H`XrXLF6<HEXrK%N~|)Ay}%mlOjen8<4N)nzW*&qzkMb#KwW)CMxj0T
zXb}uPiSHJ&(<>rZ30t{$gTwQGygBf(@YB)J!Ch_Y;%M=MqKbMOZ~w-NQF0E+ITHb=
zy3>qAU)jYcBuJ?xXh7_nt-F{B5Jq;}xBY3lEjK4dQ>vP0Y9K`6;lwhEicX%ZOLYgI
z;x5Vsa^{wydAo~#cUHC14}DGt<B7?x`lAlnix+5bL_wy(hr+vg-?y*#$_BYOIaet^
z2nb{V{$<U7zud4J*!;@Jg}e2w(U1OL7iQ_Y+SMxiqx2W1?8U02>9E6yaSs=<zl{_h
zUEZlW4Gz^=BsTlLgy<rrDtv$WdzbJ2S{Fhy@zfS~v2oMQwCQH};aVT#qmdd}$$99B
z#@h!BUbv^8V1riY-vwzGPd6!#dnwWBF&)lFj+UFSylm=0tM}3gCa=FEH3fA%QUyKO
z($aXDzKpa8H#;BwYP*-#?dURl{VAWll#}Y-6P(kva=%dPY5#QdaCl+y_ZCsFBV8dF
zZTv!+n7!4uiy7SLX#mGmdAr-{;^+A}+VNogC7qI_&6NTLmK64PCWUp7n#RSS`whRX
zUQq12_D1`S*sVUuPWj)%@hphA{{Y{@Jwvijs(63^th$tx)J})jj^)~NN4STEM&tg)
zY#NuWq{FNIv}16;jh}|pz@L(ng_$*Sy!rB_e!o}98+asVH%}XJ@oftCSAj1p$3FDi
z>oEe(ht+o*W%`Y_E2C*#2dkXjrh&|w38ULk3ryH$__9oL3nA4u{GpMSb7h1QjwkMe
zJ!>6Uc35KEyGC3Rbn@7k%2u4_m9+q7&B~z}<%cQf)72JvrOb~G=B;EYmhkOSkB?)=
zPAuPP`CE@^1+^+E1)JX{zMYu6ebf2Z>dJxl%wGOH=e4TaqB}gDyNqoB9&*ugV3+Ny
zvr=V$e|iy{Bbc@s^FrZI93Zjzv6e;P%}4DrjRuhW<pY<uyWdwoAA;8hKXyJL*ZJ~`
zm>glBc?yVr9=Zy$HZDKLm?UNGC+pb~M^s!>yQ#cS%!-4o2Aw=!(}jHU0e67)Gq;?*
zqCFzXlmXY>cCwu{T81Jb$q0$bCVSq{!I#Uv<5MQ}{nxNni}v9aQ8dT84b(9)4aJCs
zGU7AWo1x1~xbzFlu9y3TpJyu%tq|TPEMPp0#{z{kHDw}JQoe;h?=2T9`7YYIl7ZW|
zYQBU+=wPMp!-6C>;J*CZzc@EUH0U0BWNBz<7(TQHQ~5snHeqt4DJW#{yk53A9jv{S
z(}XdPEV<Pp*W!G~3zP2jiww|{7m~W(@k&hWE*`>cgykTAg1<sOUtLs&$)T<rLh267
zw57x+{1ON_WI~#UdhFJz139jmImAHhEySl^)er-(d!E1@|0&}-H4w4Sax#|)kRXM!
z@2H2<kmEZ$Eu$_S?_3m+e*_*(9&>)KHop2AGHkB;6$Ze<Q=0Sy{xmf;J$|1_`RqSt
zY)yloqAJ=E7Z=xGUCq%&S`<$em*%XP%X=A0859x{GTrQ;&!Ci9F!<fG8r>cHTiB||
zc05K3TwWec$l-8EIlomJBMUqdbz)GcKcZGRo=R{380seX^IIvFa752m_wo1rzEy%_
zZO)d1wPx1s@9WbFx=%>tqqH1J+~KqO!?<7my6m)F_+Af8o^xQPMvq++!FI$wrWfLY
zBHE~$HwDJ-PiUoBEZ6vBuJ=Yctmnl$LS0jc<Ei`Py4i>N{DFm+#VDase}!q!9|8_T
z<5LB1*Ia!L$6PlzhTg4zH%-tcpr)svW$cw;-l?IJVo~j-ztjpbo!8pdOj&t(;N5xH
zXnLIa;jQ@X8zTjOy)lrDp*+78P^!rfqY(7r^)N6oxg-WH>~Sf-`U{*h_ICP&awA^-
z4I>S+-qaxf{O#MJ$7>osiSy}VI-qebvk^hR4C7(lZolfVJ`~iRUD@0!Rn^{p5{y0w
ztQJ<OR!!!<jB7B|`Rp$Ykn}+U0xKIu{wym{H1e@05&y>7JnCY*wQtK!_HSgc+ef=K
zx>=+YwzjsApT8-Ks?>j@PfJT9VAcTlg~FTXxpF0hJ4eRF;RvGM-qFx>2YF%^{Rn^m
zh@l;ENA?17GdW)cK|j!J%rspTjODx*bp7GrB25is8Ksm6yt@{{7V#E=-2ZlhK7-i4
zwx8NNi6Y3_??SJ*zIUqRUD83hnK(+cYFPAJ>rJkA{lmGngAhW`KdE)h)Ya8}T2q9*
zfB)2~sl>JU#*Yk_x;+*l;_r`=5<I<j_ungG)o94|HcP1@&_yXvPthNa1XB4YISA==
z@{>>5ZRZi&t1UO}x6{-NO3x!3`>7r%4%lvR%S7)!b4*R#BOvIv3U=Ffy+}4#B`r1D
z#&-RAKZEZvt_Y(8fIgN6HO5fJvpvqGZu$-+t_TjM2n57CQ8_v0O?~>{aSP|O<(R!u
zc!qHrgX_~e<?FqajlU}x{s;aOPEbEo)KztA+D^ls?=VVP{3jOHD6o{UW20GKH1r08
zaA~DFcXn7*!aeHBcjo(J{wP|NkqFb8Fjz3U%o;3il`1r`$3&!f1t^IGhP1W)W|2sX
z<9Bw4+>vnbqy9l~G9$lcgb~iCixk!8E6`Kx=e%XtS|9MJ{4#fUKl`Ptk~2oWT$pqg
znG8a-YK(>0J3ofWCO)4t97fm6su=ep1)~$wa&aXBr>R=k3bv)8>`RPGL^Zy@79uO#
zno+b0nYb~xm(>JsU+LWT`urJ$g!*RwBJ;+BxvKaYKZ%PsL(Z>xf(&P7xQ50sP>6$c
zSDWLN%=Z;Pt+pF@I~Y=SJ>H%04vpPvo1R7vN%XJ-xy=*5l;};|hi@tw6ax1S2U))v
z^;_v74^-m~ug7x)21`^i%}dXXV`=H>M3WP#u4n0Wu4h!YM>E0pUHUP@U<lFXCx|X>
z>DtlI?sZgbCAz4i<2!BbrEH?j(%i`R2(cVXQ3N|rIJj?L#XF<AATx{=K<K&d$7dg)
z;&c4VjUC!5cK&>sU}?GFf-VDhxX~qWv;Pujtt%zP(BX+G@_n=WO7pHgxe5m&eJbL7
z*jy~)m!s0+C|Rl3BBeGhDJo%9^_A{^a4^Tc4>^TB^tSC%zWOAF0>U*ImDY;X5{^s{
z1m+(hA&xiZM13K5J2w+uoelPod!MwO!3n+a)w=dSIEDV11|)ora@#=U5WZiyYnZGx
z!{@PE6_=4g(Qgy(#uh2>Y`EydehVTObT^HPRXD;Nlh&Z~A?40WPnU|N>~p<XZg^G1
z4ndXtyM8EMje;KsZl(%Q3}e(j(6s)L5Fam_Bh21kWzf}d1MD!?k>K-BAZ)u-Py+xg
zDw|%jH#5pSi`;z0%Ie;7gLP<DCgpcPOVf1+)$k_^uWNh9b7djJ8t!{Zc?#(tLT_&|
zJnzXn{rW;!J8NGN3uO`}mTFA=_PYbBDwkWFUg;1nBTz*F&~T~VlKG!S@wdM7gSKP=
z`OQrDZZ=~Sz@%n^R_(OxaRN+bZEmhzY&acv!TpHn$?Yv(ZqF=!dIro}TZltr?cu|w
z`Rc_y<vI-k8N79mmwmFR20nK$WWcS&sqMA{-C3-)&w?&sgxTl2qNv0_5aR4>hi6}q
z%#Ntru+*=)V{s1DM4WzJUfzQZe|v!oGgUaz060XoTgp0lV$?wHl`e;!d+DhEWVc~d
zg;QyDmWAPWA@yDi&HsfqJ#m<iU73Uq;8={XY;rJ^?zxE8^EPmXLvdfDC?JxJ*pjF+
zx)G(q%}SI2j5Ib|M*OEzUv1fJvzzkvl<yQkU0xq!VkKl5)m}}s#~}<u^eZQajB;ao
z(?%H7%|C|QWGwn*WlB#~5vo641v6d|J#1`%xvhDo+8$6oEn~MxhT}?p@$^LUd+pen
z1P*+acX=hAs7y!a>aOR4aeRi|Wo1qh;WXECkp$uNAW1l|+l4T=#p5QXLbvHWEzn*<
z7PTL=dDC^GT2MqP2ZnJzb`}wNeI|neq>$5VaePT$Yuo9%n%aZF?SpeUb}v>={OG>o
zfLe_SjPN2@+!~ONX<F7ntoI^81jX-pahD?s2Rk-Dk#U=ts=~#kc_adB)z|9qoSBiT
z7a@_5oyhKcuwSmPQ+IW}7rt&(d_cO3JI@;yZk}iTx8S9&R}syBrizOJXzO$7HFh$a
zzLV48ByVyKzf0qrgvOJ|;ptJCD}fxKq%qsy;zUd)2lj6+p2+EPxBhwX_l~U3KMy!J
zi_tqd>yJZ<+)H`mKt%b4bg+za-4jAudff=8lIAwN0mlAjZ`F0{nnfl{$$n;&=z~+w
zM4T8(4dmoT;7RC{2NkNr@X#Ord?-XaiTv7m>m=MP#Rgab%rA`&$bPO^6@`LA`y#x9
z$cz81aqW5V&$oN=zw(67sEmj}UJs=5;MAp-8_|*B#A4y7SP?#Eiy4(&KBqsCDLzQ4
zW>=j;`~!nKFUtSN1+ZM~l>;AYdodCmp^0cz^+frj9*KHk9Su8xS9|+$5F#11yL_z=
z0C@NX5iF&Khbe{=ffvV8P9dXH($tKt9et}U+&m>Y{4c6(cv_E*^!G<Hw@~2Mkv3CZ
zD4`f2F?cyV3j#t$k@JDBCi5QOUtPVe(z40+C%C&Mk_>2zzo^{fsj86iR0aDB7PyRl
z08-X_K_cusHYs8d(LKB5h#s<_*gLr~Z^YZ1?Y$fr?>#4rjn2NbAf%4Q-l2R6I2UZe
zK!VWrKH~=r!`u`$eRz+}L!uvtQGi`BjOV=e=}HlS(1}<)Ze~jhAL2&J%$2Cq6wNxE
zEl)Og+X525wk9`8qK0v3U)(ZAGa7BfmrD$b`K|1{Gl%#=Y)9!d@~v1W2b9e%2M+Tk
zPMetJM9p{0;y!=sI0|W|_Eu}R{kIq&NN2k-<i2C=eLZ7K()2U><oI?Rt4w<gZ-fZe
zU}(><xoWM~gIL#`+!X>@K<m=(S6C><MR__{-1jwkV{g1$`D=4ByF~aoumCg1h++z!
z*8eF!l9+uS7D?-rho<vX4q7wL_f#@IVocwxHgVxhG6_qSG`QY$@qIs0xwD-!OafI#
z|7Vd#sFK91d0}*bzG(XR&N4lT#PIWz2b>EpHKEoYJs7osU;GA8(6xqACo;RK>d&I0
zsgAZ92~=R_FJt+;O&g8Qz~sZ)Oix&;wl0e{^n0wEL!mAj`!s~~cljcliRSEpFeZcR
zb|kuBtp)92abr1_ItMHov0uq)!P%Uts{!DzCbK1_;XtywY>bx-&pOTHmcfVv;m38`
zca;1HlODyIzzu_$CahxwY8s=T+zr4#sPD9##Zd9taew6ZQwjxW!Ljr))6sE>%xf>f
z)QeB!2>s;12^xFs7=gY>W3bA7nAKiF`%t^ggGEx5h)RLe^lQP-)nq_HF`f4!wCUlL
z?_=M}!Znf-oJ$qFWO?IHn8EmqK*}iU3oBVK=W`2ivTN4)TFzRXQD1Usr9szuWTya)
z%ugeI;Czn$F9+`?iqXi`BOEJ<nG?d|BUm>A3B?vbZBx?%$Fha?-WaxG?R+$Sh0^`o
z`T7yS3_smZqFs+O;K#X#L{O&Vz&T4s$ayBcHz{KHlXml;4Uz(u3O`rTsECYoWGMat
z@3VDQ;3^))TNog}A5HB%`H=D0oE_4s!r|p`$yW7%Ii}L0q7*3oy!+$1t)a(62>B9a
zFZ1%b_HZt@P2+q$aS3Nex#w)T3@uz@dV5&?bJYt$?=y{wKr4-B+zOJK@_I6sol&1n
z8qCSF+fY4ef75p?zY)-q2uFID;k;I=<&4YS<DpEy67=}(D>FViGv3*4t6FQfwvQ!D
z0BSAx5gub|zFw$VWhCtjIlGW%H|T61+_-O!Kn`eSC{%3XL%FAWp7hZOPtf&_u;@u*
z-`W#q1`bCpY047eX%Z5r+Kz9UBgP8XXfV=jU$^(4(n+|j<ffzEpuZ+!OP~(<oKW#J
zX6!VrR;-dmHs4NHG!DWDtdVmO^W~d%O-Tawm`_G47eWQZpRzp3aOCr1?e0Xr*9mdE
zW^?hlU;e7u+LlxxIY$SKC+)VI8@A|vbc&lvq_*9#gd_kxx0~12jyZEaM{PEMdZS!-
z-;x$cDaxjETTl66Q3@WXg!pG~o9Ei0O<NBo*zCG3E-9)b8A06caJG+cP$H8O?tKYB
zgtw#sZHR&+60e{N>`sF31Uo#@M2KJw*V$M%VFG5X8UDCh)^mx*|2Lun6-jm5JS)kk
zyMw3EKzf_BJjf&qH=w-Uy`pL4a7rDMRZc24JNeK&_x#-^EZIs5ZNPvLTg(C~E>$D+
zYkr(p@d`p1gn_`}%Mnj%FMOaF_>iOof`1=mh(DA;yFiHjiWDRUb}t>5!=pvHaNWur
z=}yXYa(PQ45f1oft-m-=qhI(*Pg0-%#9Fl&eu1>IVmhBwo{?m@DK@JEFT=iaZg`f@
z26a+g175`Hfp2|H)rXdga5ey|%E}fBMGS;6kWoOMfgk|eLkpa?W>~$3G$>%aR);*2
z93PB(sO8!L^n7k4iGBO0Ff-6aaEww1xxTSR2!B!L9IYm%89&YP!fggCG>TG)+Y$ga
zUad`4IW1zOe{a;r&*|$<3~mH`v_V(Uy%p<^s@U8Oy~=gZ(*{5PJu!e1fy|w)g8}3+
z14tBr(cMbj$DwM!nNmZY|G|=`Gd_MHYJR5lzLRnIGWbO^zHz_d1Gz47*dV%MKY)@u
zm)F%|S%!U!tcu4VWBq4{AXrNx5MYYg+tF7Y(J=w78Sz%Fjj2e2KaHZk7am_!x!x}z
zy0+QrP^xavC{1@#=SCPK(lX%ZNyqM=S2%C=nlUJ6XE;Kilh{(&4em$ECE0816lvz~
z4cgt4j%Ehw0krYl`{{ZQp5MTc2ycvc`p=CQHDZ_@jH!ji!P67aEtq$m#D(?!m1B1#
zp6)#Wz$GnwCG!Bizq;-tX44CMs6|!xR~_mb!=0eL0@)>IW|G^zXeTDlhy$nZC7k@-
z<2f)KFkf3T>2lMBJhR*BqP70i9K|#)aS^M6%(6Z2rWdja_>=i9rCA4vp5Ew1d!ceo
z>-)+4S{1<7ShQxN?fB@lPYwt)-VFurczsQFYd7~jLDF5bTX@Xz#@`zG_s^yD?0*mG
zCiW*3#}!34qP(>aQKGb4Z-0~gAdp1P@BQe9zq1ny;hLN)(<-}(@79HK(5sYcR5+f{
zO42(1ah#r+p_|C$>tf;K^C;*#ot6XcqbmeVcvB;eE74>}l6ZLIuW^}~&!H)70$A>Z
z6ZX5nk^U-g`*-9($d4bZR<v$AQl9a37VIo7u4l_R*)bQkzceal8^28r5hbqxVfpLs
z?NMQ{rKP2=x9I>5{p0ldMH{GgWKT6jI-2xLh_nR3_+5Ro`<2zyOfmjCO}8Yf=-u%b
zgRm%;RA#cB?cZgRj;9Zg_I#mvVJidcKg`jtMe84KNV9Gny_N)NY4eSvNt15+>obMC
zpx$cJ!TxR=U`EyL^R^2%fo@L-fbS+IB(NPW^?zq^q^grNqxW$KXT3=Jzrw!O*L6{B
zV>vE1S4l;C@k>e&j1ZD!CA$_ZhT~gwjtvb>dZ`_u<_-0mm-8aIxh0pWXiZZd=Fez-
z@yB*1W$wZY5xhKoPq;UKN4Qu0S`Am~W>n>9?{X@_Nci^#Er$8V7X@5XcY#Ts^6eGo
z!a4Pxm}b#U-7^XR^Mz1gg|D_Bk7<tiV`)lb=A9)2F9|yPwC{T*Y}&kqLCuur0ZJcY
zOl0Ng%4p{$X!3vKwwhHV&>bUHT_Q2iAF9dYlnZ?L)IzRrhC7+Hy78nUQO@!ZDfyJb
z{PCPc5Y~O!O4p}QQZ&6ON^N%wMy0O@dLxrLB&4xofG{BNvFhElkxm|NgK_?3Bxm;u
zlt(;1a=v?q=mObLz~{@BemeCPYRqHm>Wd2l4QwLX#x>17d@^hVoC(XiX7`iNg_)rJ
zI?3!mdRLQ-L^8jAMZzIl?53~f#dEKH7}ZB|MwAajOGTHgx^X!jbi!M%L0$9Rp3YXc
zMc>04KH9CP1m2>S<=j9C$p@VR!V(PSuP|xsk!_n)dnL9MeFt3qaO^MPH|@lJKL2P5
zhSZhhq;zr6JbabTf5A{8K}|k`_dbAcBk*{_9M*B_CorU*cHE?&&YYtBJ+vmxqJ`4q
zP+cX>s2L-!+)jaT5)~k-EAmxDCG7`-+Pi0Ix7^@Fll-`JhQ`2T!p={lLN+jXgL^|~
z@M;oOrGrF>{|J;@iVEF)dM69{Z|nT^eOq0r6JtKasy||TMQ^jTz=4rt+&9RB;a%Zf
zGRd7_a1+KmHER~xmW!%I0RmxD8m8dU;EAr=Ir)!)+6OT?CThZ`bAb^IRreGEQ|%4h
zpg2*vwbsUWtHyle`qS`n3gljj3Sym3OML9X@Bn?(JwsxX&6^ll`0w3|(~y*S{x><a
zgi?gi9fW&HN26RGJZ}$#gPvNni+BXEOLZ6BCHl6j)9ia8B2;fIjdzFKae#dpTykCS
zVIovg|2bwkiZU%l5J-wk^Q2X?U_f{bHDDdtVCjP@Jo9z0*6ll)-brkUu+dhMS_8C}
zcvNf|U1YD)O>DTbJ6=|56v@FPA&>$=ry+L$#Sc%)@Dgm-ucB}Uf4n0c@gEZlWNKKg
zBj~tpefTc80=oPmKBg(O?9A0dEuQ4VshoT58~&zO1ho#Q6hr-tB*RjJR*gXI$~yp6
z8|zLJr<@B)YY(|fhjR2s&cepTw~-wanlg~s<jl$FNt1SJU(;^e)tSOx<J8-=|GLCn
z2_Px6-3mj|Hfiud^MFt!y?wMw(b{n}k+lZ&_K4<(+|w#fqBc9yI$Iy_5wcPVE6=dq
zOcmDKR!;L^Wz4sGaRHulSn5`z4-wn)QW(0(_5a=2vsj5(Po^Ev(Pyknoav0Gu_^`Z
zV%5h`a49yJT1(oLhQcjY#|EA5Vr-tVAJ&h~vUh$}RE=WC+bX_y>|(dl;u`KAQqCg1
zO@OwzRCURg;lI!K8Y|i=WZ&R)YOz|miaw2_ayK{?_HSG8WW=f~)d(K3zZZ95Is|pf
zsALOuN<>UCc~2<zgK*$Ji|JVtQ;>_G@01H7k#)hGIYLF?9(#Lg2IHKc+i-9bxAkY9
zl^8F7Wd|`4bQ%)|00tgb)_t;z^_45ItBVWIJ9BR{m1R>illWQ99ogx*SHAnh+d(@d
zQWJq@wJm+Mc+`<BzAAX7m{$Z;Hv35LuE00={6sN81#wFrP6%cG%P;CQ)_~(|G|3;s
zYM6t*p@l-EVm63mA~ehzkN{K6L1p;t`yw&4Hgb3<vcmj#8@yWDdyef|baMrn0@<ou
z&ppO=MeR*RKeBk`!$@nUqO$<XueFj}sT9o`@)j)P%!BW4i&545q?muJf)4ei0MfPh
zA3ljpgyVNX#UGHMXZCb)&B0oFy5c+w8LB-=oqNKj4TV3@4-{ZRyaoZU^MoDL|JaH5
zouqWTY%;7Bmg%_%jlQjW<;ZC(nrs1bTnW(CmX_1v<8d4PUbh@A#_5ljD!tIF)r)J;
z_80Bz$X$V&RKN=aJB81Qp{6x@ZNg9>Y3dCZPE%k(x9}R1cCG2zazmL?=T1yMgDwQf
zjJn4oNnriAN}0aBDXx}371HorE;^e27UoMvE}jNC{=HpN)2yVo7xB1O5Fu;Jfsvy5
zVrXI6zKcqYH|#9$+-KSp9$tFq`gCn}>MAJge7GnSicNJ9K8uX@5uh=2s&{%X-QV9+
z@jKTaZ}q*U1lbQLVip=ZI?xkpMXprf*NQ(Y7YawShku(cgtkxrlwUp#PfAbMxEzYd
z|E-)OTpaoh4OV9h<QC(&$LKAFQcG~A*n6^2)#7-up2VwIRKReeX>}~DUT2P}uG?Vc
zI6vy@-z@XD*<HyU;Aeb++*j0E`pJ!at$VSs$<!s7_{9+;P3b92^X}cReUn%rXAZg@
zmFx*iP73aq+nMr=Sm9+F5Kkl2_@Lk^I{_p=?|E7iie+v-uu|{0RQrav&Fvx%U~sKn
z*k-Orub3T)I9^Tv^nVcKT$co-?GSR>Ka_72F8;?ztTK4mwOixYLr(uY%~sLhKkMl=
zUmkn9ywPOev@1>NvB|$(TQl(7@h!OD#IomEh#cd?%`^TthJ1jUaS}_Rn`3t3^tHYD
z&uqPA-|T$FP7FVl9Db|$p4JY4m&dYzgfnkCg$5LUetrz)ovb-ZBvg`=)zed0UhiPG
z$0E60^krW%j4og>oc~AwDlLU5C}!};tTfrr{eix;oz*!&GZxugDw9ZM>O`t`6u{+L
z?;-nivCcBXLmy26&P~@OF;7b%m#Q(*m$bC2qQyne4ecz-kk6>xTzCs!JRQ*%UDkg9
zR%8&;oeUJ9+o=<?Ws=AB_NOtv;mDP~CN_wYd)d!GE!`z#h;Gpj<G|3~a+H#_8=TR8
zt?UR_SH&YHPINt8{97*U!*Bl(uSl8qCG~?CoTspa+N!Ac@`T|k?Ps)&?F#C}>VfDJ
zwgp<bUGrToBV$F)-0bMH^#6#Lrk_&5aKe*4ow0FbWeeXW|1nVEl~PkB!|ReL>%P^{
zC?Gks84?fG`U&V2FjiL8H!CP8R5SsEnmrY;LJrjE7qCwRVZfqGI3w{Ak4lDPfV$s#
z<*tcCcF%n5s@ZAV&I`ZN|0W}_>D_%}H(0F2W+@35dt?<)oBD*|6J$sck#KtpUjz_=
z3$@N43OK^4?U^B$sj2B$|7}P5o-bMNu8^K?R*kgBrU8GHNn%M!Njg9Pn+0g6imA=-
z<ZMt9dxwA`<jT5RUncJE*-k#$`}Ym&j`VUN7)KreA2pw2s&?9h2;}-h)+@JouG%N=
z3xTw^_}42=!V6GVGrb&~c4{*Oo<DTgLN}30XOQ`J$D-Q~CoA#?x1HX@;8NY7uvVGI
zouKhsI=Uca%EJ_XfFB%ed-edo5Z|j~-g`1zT4M=)o>-nQ*Jh3Nv9#p<wsIeS!etS|
z^v&82i_mdEs5r<6bqJ#-AIoHq7H}Q&3?4-yzn8m5(yc9zoeRy%)i`dmk#@2TAKsd<
ziWg{+>2N~Bd6$UY6+lQ`s((GmS+G#d5q=$L+E8Lq%k^IqX|V7=(pa(ET#vZ<(EhoF
zm!W80m%V-RwLvV%b7_o(l>asw+}w<mrz|wLV)0(K5(i)X<c!#W{PMUvK*)Ak#QpMw
z31CxUP;eHe2thviOT$>J+F)%qNyAbHt>}mT8I$Jpy8X|Wm;uY2&Uf64@W?&^kheVb
z;+};rsHZoLF2qH=iBQ80^BLLWgbs`_Kx3Ojh{jqjPGw~LLDg6h2q1s<HhcwY0+90&
z`YZTlrP~JL+crW6B_t&!n=EHc0pWHoxbqpqBO-p4egS{1e#J7Cw5p2sL5?G#(j0<m
zgtgTZoS=D<1Y=MfPeQZ1@vhOLMn5WD*rD3g*Z2rcQ-7Y2_0DT<{wu<&_wlD>(!2YA
z4BoGVpgfuVc)OsMz7M8P9Jtz^t5JnAzLB5X)EVpJqMtE6@DRx6n;q7ufQzM|9uw@5
zxhEP^RuyrI(?I0BNi|?!hmdpY+Ww*ZLdA?l5M=}&4lLi?c;`=<^6;`T$-!;+yA$cI
zQVCR}A%tgQ0SG>BfMuks%)vKZ1<CEm+guery_XzfmelV#tg4P@I%q%%C8^P{=TW@z
z{zQgn`PY_Jul^^lc+u=inZhgj{=ug@C0C|v?7BF#pXWILEzJB6XCq@8?}d6>{w@>(
zF&#^NznsPho5tvUj!5<v=93Tw{kwPXT0L(5$`i9Z&IT~1Wl%}TTAHT4re?jM4n@?L
znZh&26CqFnoZY(x=_r!N2q9HY)1ebf#q`;?Lz1Z7IEIZuFQYA4$F_>}apo2qN)o4D
z>5KrdRB;xbpp=F5^G_OAWp#>V_#54y=&QZzCa>ZW!#0I<<}SH0#ZpYyga>ijp=G@^
z<*Egq@<6!`VtBSZcO!_($;tU`qtoDcclZLaoDrZ}?bmwT2-{kZ3Ilix2j=}CZSY#9
zSI_u~%-)F9IhG+22(bAUojh?5&1%i6^;9}JhBs>9a=jL@GDcW;#Dnuj>t;<)Bc_DA
zt$UvirL5zsj_2z*%}|V^FU#}LyiXen(b3VvzWZr!BU-&M?eYFI^RI_hp^QvnVWA=v
z<aOBErS<!in_-DpgF9&%pMx0RoXV@_GwWQy`AjvgeobkI)kh2F(x9$k(9vA&CFTCr
zWO{!Z%Mb7X6B#-*o0YOiVVULjCtmBb=v4oGvk*usIY({!yI<Ekygx%rv)5Vf^XJdb
zb>?^E5eeb=U9C0W^Bg3@H6GNhj-zR1+^j6l&y!>r!dyISh&rX-4UfCr_4B*@>bjqn
z|Mb;~UM~GZsRf#|s<HjgK|}x+)ZH$h;gYd97Z+{)-8kMIt28bUQg|;-ljx~A$tWlM
zzK{|2Y^0SpA5Q+GM-r`lT%7n8#!a*qIv+F01mL&3p8YLCtY$>DUTr?d^U9|O0#*zK
za-kI==&cexfn;Eqt=aIABm>^i!hBL9z@OggWdd05#W~|LulFmcV(fA-<5}{*LU=@;
z%`=Phmv`Lgzcj@$znv>pe`x@s$8#&4DnN%=6D0!K^WlU3q+8+xxzUnl=te=FDZoXB
zqYUI$P?0lk(Y$^kQvew_$fqYDDOxbpxj-r#&Tz7iY?Wc_6yHKy>h|dWnYg*xteam5
zC>U#v7nViNd6befO(By%NreE{6+b>OG{)CA`;r?^`JS1Sfl5bWxY@zLl;x`+>-4;@
zA3h=BmpYddhSu<ewKi<E5|ti;`a8{GDZ5*vp{Ug_MKlg#>74aebHAX+k?dH$K8KTo
z@=O1|rI-+gm{LdLg$A_#N#~v=FKP3BszRLX)M>!7TyA-H;G<gq5+nBaJT)#gU!61t
z+w>tC)#2$lP-7=6JOOTKS`*W8!CZy0o)djgtU6s{xIg@rS+D85<Dcq?ShN5#Bw=Da
zb=#J^GT49BJ(;>gpL{!zF&yQyYQ9@mQmOiZYUi{5J=|B@nl~#iI`!3LB_V|%fkKUn
z<EqZ*>rLK3ZE$mfkn{a1D|^QLK-iH?jRY4(b<S^-$g6L<mMG4^a?+@e0*k4rF|@B+
z;h)S_CAtE&Bc6Y27pA6Egx2DLGyrgIH&&!kHBWr$vp^{L(>rCU#ia`@W}K9awPFXL
z@Fb+3?)U104#-dc;lr%?fssIYf1THnMfXh%7Dp@Nm6Y6#Mb4jmlfn7QR>=ogZYSsl
z#vp9nyq=gwq&5FXhDpir^JWN*9Bc*-jpOk49UkoRv5pbS7`CwdKmRF^2q!e~zQPPb
z;$zWuAUcvPbSoNH$nC125TlmMzi6ANVN4TTx&!aGIHg?n<Lm2lACRhy%_XvY#3DdG
zuWjJ^C^1Z}FDdK&>;R}!tj1VUKRQt_W;PpDQUgoRrArYmyRojy(8lH^=Q<H^hxZKg
zf<g5kq3Eb#y3ysC&OHK5krBYTzmRVp?IwiZCEtR8MXUO(3wjs+CmrTs97Oo{dUPrv
z9Zw;P|6!c%bBW=J+xpYu{>Z>mooukhpT8yFot?k@xi2m%!lm+yI>`2#Udnr;abZnU
zcd6X)brdNB*Qgez?7}mR6EqLwc6<KP%dPvC;cYR|^HK2|TtxJcmE|+H^VXB`23~t}
zg0ng6W<`j)+B6#Ub_OFwVmMX=Dt<6-i;C%TlIDO^f{^+Bz?44G>f<HVXf#NEeTs#d
z`NVHD<u$zpFij8Plf6+*H0R@4p3^0Z9E>6No`0-bCFP0dhv}czoIplS9E8fJR1zF6
z-vz+^OZOniTgCJ~WUA-Hy-`1xxxzXbZ#|RMA-zvJxTl{2lbtFWXG`%rs=sfEI{-9P
z--rtQ#DsC;bP+%AEkNIox&Z0FB5tISa;K7m&%Zpi`^;E0B|hWJ+f8KbGqqjiYlI#m
zW+H8yoq=Fi3n8GUu3Y)0tX2LEN@t<_l|Td(3TbjhXL3256an$K{*teomQCEhe7<>E
zeQ!ba_j5>=|JbGD@9f_LJThr~PW2sfskbK&a&oy5c>X{dDe?vA{Eqkc4;Qai5zP7Y
z0FUxZ84riU_G^pvbgYa%9;;<p<HuOj&uj4H-1ZpQ?A{M!X(qG)<o|8v&%^zS2^VmQ
zA+PQ#iHn<zsI!QT4vVg^pMe5EZPL`QFZe0RyRPS}$`v|ujB5R7=?<E4A&uv*p4kBi
z)dzSCW+M{8uh*mGZ4a?P2GlS6Tq;hz`YImhDc6dE@AgH5CKpXjffe7K+QQ3&{+)K-
z_Z01B15}<vV4vPCRQ^<{&;hxHR#%QMg4>T`#y^G?O7I^OCS0U(S-cnWYE{>V5=qD9
zCuLtU)%I3=?*=p7kbImhh=4Ntdk!J2QPlc3d}GhR`DRL2w${2?$*~kxuSo^x4gXA)
zf9w0(*AFIG99=`Kn&G5z>nYt~$NZbaR&y`WYFWTFDnIWdIs=e-mpXl@fG;0B_-8Q-
z+gsWSsmo#;gt+Oofbt5L?Q;Ltmeu+jL5NU*GlwsNw$IQ)GqZvcb=`6bu86CbA^mpO
ze8FBTt=|oqn(ghy?ruF2l!rm(AHp1rMp!5wT<eMDmDsMYLT3E-+w?Sfp}DGnsra>s
zP&6n1Owk{U&F;Zl5-U~@ftI!rf=woR`M1JcTz!eT%ef}dXR4#Q6C>qN50cXTW7(gg
z__Rt$w3S(N*`#Kl1yNH=d(Sd7N!2ton;3*6yk!y87QbQO87?iFaw3*jtt>>5@dh2_
zJcOq(qoSHR^^3%5KVplL<#wjna$*1<O=b$Fi7F_t3Wcn|)L(v&LXPJ=qJmEX)WPg!
z^|`wG+Z0->%gLtQ_gWBl{3*F|@(^u&k$(=gErWAbz@vYHC+6<CeE~OK$xY?&#tNBO
zB%u=q#%p5UxHr-uJ64PIX9`<N9CCi=DB!aeHGutSpm+YEddNx_WVgXC9`~i<eDyre
zst)YoYx?}C?b*7*8J{ZbmhzxWU3c+o2u;&nN-Crv{moNIc{z*klNlYhuzODNCqi+C
zgR*7{`y;HGc!t=0q!`fj3|AEAdvru->4D+7${Sfs-(lkw7kcIcF>G5klE}Y9j-8as
z3iy{F6B4Wd49c+g$CBWSGnHSu3L=Lza=L$VH(TTj&xz44FTkf6187do2ia|R<gUci
z=LNpG2mi+f=%e6)vp3l?o*3c$GTX;kz-2v8`}e_3XGjbEflgcq5p&L!f&=CV$5!cv
z-RDdv;_c2;f=&Hh!1RwcUf`SO6cD0nhvPN=1PwDeZ`?>1rLs^QyBV<|yZ^9h9iC0D
zDu(RvEgC=<iA+t^k&yc9^4^i+l9ltO6CSnXG!S6|PQuK>g3QRZxb$;dTNK#?s4sM-
zT;u>%ghu|kQjMx*&@V=TRvA83fuW#3HT7&Ei;Q=AsRKg&FISs1^Q<Z*3WY1I>gUlX
z>?H@+yJ`ons-u5xb=Ky62*aLa1^2BPc%tqie;d{hGhW_|mqaU=L_tkhjTjqYphL{M
z-K3pF{%Fsc23IBV?Q1%mK)kSOVti<m-B{)m1w7Iro=hq74(?Pl7D;enfgPfpshJrf
zpb`s(&Bm9ByaM2Z>}?FmNTNK2H*3VP<2!;ROGF~DUUAJ8^t(6NylDz%&h>vX%7M<d
zld6RZX`HdeIp-A%-}7W9$o<V^SlKlX;Z9+oQk}|GbW4@IKt_n7rFHqehO#Z&O};Dd
zOY-EHBAt#~3bXj!gDJ5`EXpY`PGi|=eoQBynfyg65}qozV(!7t&>bMRcq>N%6$pCn
zIrAwk7@$V;=OtS(@7p*t!@(R`S{5`bGxKyR$o}ctz;d}Bc6gWvUF6uTi}?p_K-H_L
zhQh@s;gckQJx|$kqoy`JG&WY>Cc<@NE_VA=ZVu6fy`bJ(A>L??-<mpJ9i;N>DXS9x
zy*Juuz3B1H@7*;0T|4?gA)QzncYNp`(`MYTsa1MhZiBS`R|N)q&B{fV81(;F(^p2t
z(RAJ71c$+cyITnEuE7aDf#B{QG(Z^KCAbBGCb+x1LvVL@=Qhv#-L>Y=^mLuBDLr-0
z-n#)LqSd<##j`7S*YArrB0+*ccC0DsNELq(09ZR7%`*%pHVy5ZGSbkD;6CQXYJx8R
zCdVQVm4CbGPv0ckZO#-GKAcEMm}X>+!2Sp<Qk5}eVC?*bMNR6mk+kJcxB)$2r#|fE
zXpQ2JRoD>5SGqVf6;vHn&&6QtZ$yNI=Zq)rI@<sj`swWZb7>P0&0B>$3*+kkHo9(w
z1Hpk$=Kpe@OuuW8l9Z$Xf#OnxeF8?FfmGr|m#@~J(jz=%4N<!d9w4*j^)b(&#tQcQ
z{D$a^mYQ0#<8sVu?4=msP+#@G822f0xSxPJJ8dxHV;oknAH%6?@@g9#qPA@WK#s<V
z=txKnahaJ|+rLNvkF5If_T3-rxd8Tj<l;hKkiug)`N^aj5GvRxH>hHnu8B!-fk^y_
z`uB_~;C963?=ms#wAz57bNuJ2MVcy}?Jx=R#=_*fmD=gGJ~c7X7EX)1-+iCYWTMkp
zaG(J}$Lk?&afV8ZV(2XFNs7g!puN#$Lq1<+K`(Um8cjd#T(#aIhtq>xF*mOSL}I&I
zRq6K{8!q5ZN^pE~$L9it06Wls^hh38Q`-t|+3S&=psur6tmU&^<gROaWA%gs`2MSD
zfG`p@Y&b@dC^tuOQWCUKb1D>yC^o*_BoLJbq(iqm0A7UmgN|Oz+>2cMKL7~dvza*r
z=Rc9RY6y%(&ttB|h0VuPEn-DQp!qobnRZ1YL(Aj)QNKa6@0K^oyZ%<&Ci~K4E!h#c
z-hAF$<#x-aTfMRKbT2a6QN!9)>w%yH5sWGw_Je0;(5rQc{@N2Z>gqoJo{Mn`*xLEs
z?PQJ@JW&n-Aj8FQMH~15_rk`x*Py>RjmD31^*h-msa-s@tVOr!mU|7xK&Mx=B;>0c
z0hR51&{LF4$1-7OA76!+?DZU<DDMW*rJQgcr6E1EVEAy1P1&iPcu<dyJ`N4S#z&_q
zKd5`9E6Lm|gJU%-IBu9%DYGt?*f?luT?VejRsY4CtQq4q-J0U~#Ke(WTkk7^QPOfa
z0(<xeiZY1>av}GlM=scC7FmihFIng+`+Ab0bu9blbLKa$(y(GQ-bV$fWE-1T5{sQq
z-XH$|WZjJQvWd?KFh2u<;HgZ4Jqg8rNe%oZT-vl)ng;_}>uY>0u1B^l#FBn8Nq{ox
zx)m}vVdckRl63p=OlH)vqhGkYrs3be0I_z0eE48i3}+Gc_kUNJaEk#PBf~Gn)ZKm9
zG1788y~AJ=wfzhBTiBMEcCF=OHWl&+qpk-I#|^Da)JsgfjB%EJ@edj`vzhm>zi-}0
zJw+{UJ-wRjPtw>TWo7pn7{#Su+QWC7TPr0Fc5+Eou(KMqYjl&`QEt4R*!^uIn6$jK
zU4zN}bqiB*KdTtr>5fB>*&B)lOTcrYV2}EWVzp8D7PSLdH^!$c9_~LZLZ4Hzjzhim
zm<nPT2a~n~l>sEaZt_+?KCm7Pk_T>~Npk$!ir^xi>1JzF!A<4D7fbrs6`RRmmRrvv
zKg=wnPYf;g`7>UjQpU(d9m&ZX{IyUy>vC{faGcj^OL7)SUWbH!DbZqkA3xB2vW#eN
z8y+o^<hm){SXe%MxKv(E^amh;h!Dz!zViV4JrfE)yCCT3)Y-!}xg9Ha)aot7u~QRG
zWH#N<U6hjxY`}m@+%QwuzL(~EL<0C!VDUI~eSIgE#sAs=vC{;LQd~TiAl^GRC4Q$L
zRu1clSR6PUvJJ7{8qcE#p}%vN0jdhnPTh|BV>cVMx-Q^#baVi}cyL|F`{wqtj%8)1
z8#1NS;@$SgZg&)x?QRjT?CCN~nZx&q`2(jtfUjmQ(W=0oJza1;GbITT4m6pAqgF2*
zZL7DK7<6p+^;^sh1^~B&+@)mLw8IG9F9iyIB#?J-G9UXK9ZqTVD?+~EoC|+LriQuH
z7yQCWNSG>k-W`7KeM>q~;x7I!E~f<L#B5ui>Nf7lGk@xjIM!K;)IbPYns+*H?3(<8
zsc1}MqSWilt6;O+aR{lLoR-_eBB{O8M)!Ux06clEH61$n!J$$b^snG@RKU0j2=vtZ
z_C>W|<U~I!#A?i*^G6ggy;Yajs8fsVbkW@+zs=S$7;qt&^=gy+o)6=h6p~*aI?4?m
zMO=OHj<K|AsNyhB17~wNPGL@|Qu(st)PHN)11{ei6B)pG*SWPitanmmgp45c9(+vd
z+wVjU&H;#=*N3kMo)=p>U14!EC3Clee!_QBa<?aAuQj%dq!JJiCl*$4rP<iaizEOQ
zm>VB^QI|F9^6xCl?N)?<de~ZD9*VjEBv%b`tE^q0?n2j+ykSzv`2zWVEuHfUkzB%F
zlvT{HejaT|T-i%)pJruc<=*1$8w76M-n$tR8n?@ByI5<Et_D4=LM;4E@V`C-a3)1R
zOv?9Dan>xWcnvrj=~MY#UwXzwzML89E?k=jV7*uR9W?#fvDc1TbCFOg29KEYBk>`(
zG!_kjrlHKjM9no>YoCjHKJT0DyZlbS|Ihi(9)O1(`<wXcsNu==plVAdM9jx*w#rHA
zx;Jlaebmy9tOq?QMbX~_Ax@Kn=?1csCC@eN{J=vJQCqq~S7_f^4@esCtG5rn<;;3b
zql_?!rjd(Sqf-*=3IL08SSdy@0sX(~cj`s3$V5{R6P67<aK2es^>F3sxO{W$E#S21
zQ-$J~HaD~X!4JlfKXRKJFLz7AZ*c?tb8h#rg`^}uBm5UaB=Ol!uY9_Ap@ppo*U1hJ
z(se3TP}ns0lr4KOrG;v4H!r}zqJ8|^7s3akAzwD5)3#+gG8n3Me7gH@T$NQ}7d>K~
z!93KvRK_3{uAZy{7*f%PMa5p<ya~#F#+1*qls?@ySde;$X0<DXZwIXd(!O?$Z>?rA
zFgW;iJbNyRNfmVW_LL^`F{48O7k(_8FrNp@-j4!cK_T?eI+IzUI8@W$+hUcfh3P`2
zAgRZ))`_~@H#IWI%a+>+H`ZewGr^lbQsOwh(DDJp^jNAFy?<AIA4`4!MCnMK1YR8m
z$L{EM#f*{3i25vIfDJAXi;FF!&(DM@wxj;*lb)f6>W8;sFMvs7CQU)ZUTVNU;@EQb
z5BkE5+&%!`fL9vXr6w3e8<xFUOMfp>mH@}g9}4mo{EldlQL`bPf!m_buN5F5MBGIK
zSaSdx=U)gu&3jl^+Xk267JL1&q(+=QlXGFvUUYJ$0}%2Ca0pT{Sj@R5&kt>0jH(-0
zQ!y)4;4VfLE$a+Wm4Zw(W373tuPK-=3z!hItQjG)J5eRnP`E5BuS7_dGt0r7G63Tn
zb;=p&CHi-?s$V4^4b}k~@)4;}Izr>YNZKAwvLi6{nb86oZ|0G3#PuRkWSHr06_=P4
zvs%~G0OX;WwDV%Y6qcp*0s34FZaJPTMNOVvlrcU)+3&$%qM_Y#-Vc2H$D$C>ro)Aa
z_zjS1d|gapY2sgB+_22^_sK;63TANRRGAwlpvIiVPL3*@hc~!CC^=<QN?{+`%k7Tm
zXW9={Fegwe>{^zNEAbx<MUlB>CT3FIC>@LDVP9{x82S#R+`E8amE&0+Jx9FR^^0wG
z`fNPcw9Z5rm^A~nIf3<gMr-xU(_WtC=FJi0wVi0wn~0?9M=e!(iVa5Zd_*YGvH>q2
z+CBQCDlqd867$$U(#A6YniO+U_Dl8Gj#R?aRaKu*3C<4A(TKPaTj=FvWswhOth-ME
z++l4>#O7DsI$oa#8|^6i{UG5Mdp8&iD%K1kyQshD43jnDh~9SRJ@~8nw#$vqN0~zB
zc~``9S^%p~#C9l2DDU=IV5@W@^D{b?j*5{i389u{SiQ5-o(Tq2YN`%}&qK2VFWoO{
zdP$_Es1MwnAdexk5PF>2PzdA&ki}uNJvv@(x>ZR9{Bbt{?;CMZ;b|Z#eC>FBka$v6
z=Xbht_d%-y?D>&mJ#Zb?;BYQ_1wh&3Hf`7O*sthp0i7KJ7ezlH)#5ebr%?fJFqD=y
zgNO$sgFCIvngIAeC7p7nt$!;TJPMS$|89UH&nVlwCw!gz3CZ7T?zNWFMJwj+ORCge
zvu|E>qO6kAJpdcutf)7No&%CWR&)7s$I4-NHMXx4d4=+{baZTC(IV%KQOP8T-{7mM
z?E^&2C3n(aQ6|%sof{p$Bh&!tL*3u8EzjOJ#LPbHoi&KCq5#|&*KW?+*g@h(;Eg~{
zVi}mwdW(Avtt{Ej+k&b(pHFV+hWL!X7(7ROPN9NJA7ZivN0rlF;)VVTxIb+12~Gcx
z#~0$F-JZi^I7$Z*1#_NJqA`Z0LasV380oJ~XFr90!uG{{wVK0p1Vxy@8n<xO>v8}Z
zcwtL8(#W+^Ugz)O{n;GY6n0r%(#sex&Y&A$BZy$AKBnVCrl<a*rg|V1<!4!TK>jO)
z+_xl3%AQ0h)Dw`)5AI4YFs|@Sr;2l+sFGd(06##2UxOA3W5v&RN<z)D^x?B5!&yxE
zEIh^ksNRP-@L)jFD1h!r^nGi?W>Q0tAT#a*2yLG6GdBo=Qn@>68D+9Y{CB|@uBbfC
zH9$(v9<5ZL58#NelL7=feRAS~$%*$}hQF}Rg3#x*wsx}S*&bIh7n$^H^?8$;qurMS
zU&wJBJptJ2e>C$#;mU8PNU8l2BQu@Wi~a-{6=nSjd}|RIK4!^LuGa>7ti%elFt>+a
z4c1~Jr{N~y0f-RrFtQivES)?7R+38z72J{O=#6$<y1oc;eXkwda<h52b?r*%-#<V$
z^VklgdH>Zehb$v@!?Iir^Nx5<pFf&XsvPIxj2E+Hy`Gm^p)VDsFD{ykwD1jA1!%G1
ze2uwME5m_)8B?g;lIhJ|E;;)`&0SR7yNS5AyuceG-QWaNqO<M_w$Xyu4$%Q|!GQmF
zD6gtVR0CbqY_E@XER1I#7wIc@M<O?I*hhwGU>5@Z?OhaKd&T=`lE~7Z7OwlmGLNMO
zMp?XTY>|D^Fv^sFX~!%DcLh}z(3)Uyu_yOb@ZTi<aFAEyzh1vs+Tb4zidK?T<AL>%
zU9Q?$gU3m=%g*QP0;2szVo4Qr4-@nyi*FEMX$=ehKhghXm#G`qIQ&>P8kdDN16vfg
zC1S8|37XoPx14a2W4>9JJ#I;dKkAOqW1QtMkodob?R;H=Qql#~;XFkHtywcZhF{{8
zUt+8!glr<i`ot;{bMCh%w^zL%U4@=@{nz;IKjA`gtQ4DVw;5F}ef~Bm+ZIy|;cQM&
z`ay)dzvC85{tZ4zBzLA@Q)#nTlnTxw_y05nqzs)fc@58f5jRGEZ*Mh6imKZaCSnvJ
zcMI_SB8>5W_8*IFKjii6wS&^FH!&~-1xEe-^uH0x@lK4?<hA3k0v$?fa0v{!99!&!
zb@Fv}CACjRb;a+g1!)}*HP9cCG0Y#Cp8f}YHZI(;m~D=glvuYHQy6Y_n^!g^>5MLX
z{)$YC7&I;xuH^Q(d<zB~795TkW@l{B(aX!-aDBX-TfB`~#nz*5%T40)KOkPdJUn`t
zda6Tu?T?k<wtn}!RH5*fc3o%tT{Z#$h9C%uB?(e3K4+1%mfp%$#1tXRrgfH{;f0=|
zhV-qfu%(Atr8C3mf>sNvU`vG55wK}Pj0pbqsF9O3xV+DH+Iva5toj=fEy;_}(D`*D
z{*IMcgeQ+V!^8U6X)RkmN{i`Fi1J&BK9+W&eocq6(*}J*Y||e3M9Ya>sjP?V!`jF?
zWmQ$97Yq>_j}U5ZM}>sJ;kG}2{`>=&MwxK46t`NA5eA3Q=IRX*X<tGRNnOC$C66bH
zYq#yongnyo;KSLGAG5#J+3d(|#X4rC0oAyMy7#Bp(Fpx88n%il1R+KyWWg3WO)-@y
z3*dGF7t>YbJK<O!AVYh6-xY$^>bBPI%VK<7Wis$1*6(REz(C~b&$`bC#S~(%y$<{g
z$KKQO<JH&s3w;A9X8k*{yYePdMj)8{{m|{6cQj^Bg%3X}OR8P2pHVHdz?5tJSD3Na
zz~RF6I3YG&)NxkU>PNGq3D>#hn#(EW<YU=5_5m^0a_czuTCQ_W)UJNYavHa!(VRWx
zwn3LYhqa<bd$KxKkPa5aRj>Z+{;WN~r&j3Y&Oo654|Dct#$uM=Zib&8nf=RY^QFyz
z934sBajeYAe2AJs1+5#+Y$o+A%|Zbwd*VE6#-VKKjp9kHjGNc~l=7El+|w$s*Zx@`
z@Bl4`1<N+@&ppH<e@r<SnN)TTpdzFP(66*WRmw(Jsr-qUI1M!=0kTt;Os2P;nlw`U
zU|3wdvas*sf0uk8zLc{!Q1!-~xa%*c40~++;&G72?Hdxh=vS%|zTcYJ7z~Y*HIbr*
zbW*D@-_5Y2Qh77L(PDddFo~fYs;#{;zAu&d0V|W?JUUKPUjJ32MEj9hxB8>g^Y!Zz
zhtGo#8n6*3_ov58?~Wdik#T(OzfLxAa~nqRJZ*hkJsaUsUb$a#zV+J8!YGYCk1LTI
z4&F4!8vl90f)cK>**+MzSf;m&PwIPjgN-3nZ7>`YF~r(rwAlJsyA#GFZK}BwjfCCB
z9&xk-Ylt!EzCVokz<c|QEAIy%;Y2q5RTakJ<AK3uA>A1)F1~&VGmo0@dJV|rx_J0!
z`u_}jZq){GsU{@+XmCR9Vy}$WX3g;Gc3=Mp4~bi}Bhs_Hjdb^>t#@j4vGfCLF7d1!
zM=|Xp4dUgO(6dsjdO`0dS3N6>-Xeq3-pr+v`3W{no)Dd~3~<mQ^>8Ns&{2G|ngv)D
z&o)Y*dtPdNhNhB<B6jLZ;n+1RD9BexzS+%ioh7pmHD}PN-ur&`iRTmJm|ADK0ZlfJ
zQJd7h)f#v4Y3F$rrm51s0e@8hv?F-8-rkPHW8Mg{uQ~ysE(jYW#bpJ)?+=0)5lD};
zyY9PO*}f9*sjQ$Z)QK2w@$IJbw|>YqS*T_pqytpPcpawy7!~RZcwP0^=K?`~Z+GF>
zr+lZ~*MH_;c5{U#!Cu)Z@!9&kzIHIu>I?FhoPWF{o|#zWZ&sQ#`6cDxJSd7f{+o-_
z#I#V!>I(&&C^HlEQW^;nRwUMcq{5S=%bQNJq^5p-vYgKrdtNiwLu16p#{QB3_3dFl
znXf3mN<qkwx>A%TJfnTH{mZw-Ijm#)V>KGp4`zxteF-UVL%4_!2j&(qt#0<Wq?A7P
z(M-PO2!}?2YVM1lyFNfG)z&5!7w@3a0ahrK03Z6}Nl(Zm<%c@Gx-P3kdgziTD}oT!
zDeVZ|@j9x5u=$83xHMobStu*Am=jV+`lc=DvL~fyj8<vX1;`wJ@nzp!;_*J3$d^%7
zomdXX@5_4ajZC`$dojE`0bB~H!&#PQn0&-DssZQqy9UB~?8MS&s4wK!Xiz^$W=~SR
zt`A83y3pMFn6Fzcr*@WFeY|%f2sw+zCe4Y`FdeARz?k3vo0mnN9^U&2P!+kqe3j<_
zIAAtiW_^`!1=wKor`Ra};b5R5HVu^P;jGBs0NrW%YKwPr{Uw?3)1WQxh~Lg|1F>Vb
z5zQHMf3P#U=3%h9K`kr>>@LhL-(r~mLpz|(cx%J!eqwwcnB4Gmk%oeZc94AHr|}N5
zE*b>+Z){tUmMipqQt%<$OWL}P_!m#Beh<(F{yC4C*>R8ZYRHyR_e^QOJ3=Ae6U;=s
zQaC3rRv;iCn>M6<8^Tt!;j!8lKk%Z6zb2Ps1g?wjQb&=|>(%0n=gS%2t{b%@sO@&1
zJ99EN<K?=jHtRzEo0UI@;HuYVWH_J{{GJ@U$^bP^mk%q4ZfjJlwI;|pRvhM|Fb}sP
zAsZV8P;FEVI=r~+uKPzn7%xV3{us)w0*8zON8SC7|BdSc%sZTU<STgo;I3G12Cq}E
zB90Yd2Cp{8zaHnWdn+v{Q-y6m5e4NB40wI<V6<`?IY$CsV&CO7H<u#8qcPvj{?E*|
zmBw8D^ug>OBY6-na=?)PsoJ@jCIvBie0+S@<EfwGW^W{A`@u(LOCF7TSLJV?e<>==
zM2VrF)sZ6ti#+>>=+7D=+12_YqFo`5*g|n~3Tt0Pbq{TsbuK6j8eIT*Q<m9UyKvJa
zAO4T8hKQ||cYu(QTAdx(%S?ldvRy(9&8)K<SVTEJQwR#SO4>~fF<avfBZ&&jwt6ty
z3)*YtkDBZ&Y2TVg_+bx)we1@<AD`tH1a}F$nAN<H@WWi?`^8%mW~{ZtYy^czKfqQO
zeHZ!kE$usDDIL-($Kq6OtAtTqeN)C_H&Ht%Kljxh`Vmji48BEycC8JK_vvz66v?yC
z)nw<Xs{TM)|1%dcoq)w$d0Z)bGF;?;-8P)d#Ta{XI;)G1t8^(N0ap5_RO4HltF{{t
z6&4m&A~Ljg2UDM1xf4KAbTfG0z9^foxZF@RzN+v`zEuC7<cr@k3%<UCv}*`b9Hx`w
zwM0kfD?%+)TGzV6tYj;K+%La9ycRFl44-}q0J+H%EMedLx9A%183g?vZ;mm3*;@fl
zhTJ;unn=6$>J8-@bhSsMm?Kg$CAOF8@E+FM29W<G24Jx90T89~qN0?x*JmX(0whkK
zhxCkg!uB=EKIR`S6JL)j5yw$2+B`3lHdyqKL8SHd^+D()3r-_k?wF7uK+#YM%K`7~
zht&wLBl_ji81w?Uqi#xavQ}tiTJgwW;%YE3_iMOA{|-{XML@ym#B=JTRP*kn<bl1<
zdmbA<TK`fSUw25sR^l~&JX<deI%$239RUEf%9l83UF<NXn8i<o;imnK4n%(t?%+HM
z80}-SvR=>Df4D1Zr%acn^|91FCC0^pMg^UR2agxSm1TaHi-BN1=gfznsx0*kwQm83
zKvVz*eZC1nUn*br1(a`X6A}{GzuhIS;^5+<=eW49YdGUMd~n(<g-?Q255Na>T!mp1
zy(;iZK9@zNdmR+mv*}!@e&|VF?M!A*Js4!r0+dBXY!<oZ^i#Z_&ReVtR!7()-bbbd
zs847fYxR3M;jr`k<_r^)lS4Ak;M!Tdzg!jZ0dkdM;h-4TUCso%CHOsvu=S<~TQM^Y
z4NU}r;|z~|ON~c=vF70<gFMt{7|wvzT*j7X)yquLC)>Abpptl<2lj*~XUgug4vK-G
z!SAg8C|0L*<IrsOW_;PzYCAAEJ+52*zg9EnNq{(&U)~v3YaheldEPYPc%bx9E}`g0
zl-H^U0=Tsi@7HQ-2q+obCt3v2OVeFND`7aRjU8JOsR-`*$pkf>QWeCTYQ`~eI#;n<
z0tWW~NjWN%-bjJAQ6xIz_!IOnU~CO+5Setk+l%BWzOSHfAie{o1TXQs^VTJ3$EwrI
z4;o`T?NTcugvI5@mgT~IP)OOjJ)FC|-p$H#2?7KK-+#<C98QV#L<)_2_U8;x4@}QD
z()j+Yg4)5cpBWlEF)h<k7VZx%>Td2$DxZ^a{O?v_Evp?okg35-tW-$<o10cXPX1j3
z9xR2rKHt61<4w9=CVj(3SdgR~kw)+dPf_+5s8|cHfg{F)px!8S*?(%h^V03-?pG65
z0aXb>k)C#eZ5w4TnI#9_;Z$?5Q7$#gS!i59{-_#AT&(dFo<o~fTk!&zNqp`L)p7=~
zAQ~u%m@zvdD{{?cFd9|yy;m2Msmie8Ej4N$V+#EM>9<@H8RS)d)Cdf#KQ1WoPpv*p
zZKQwbp}E8tHkMZPm*sNH(}7m23HwP2bbuc3w4~fkjFMKAH3QP1&*mg4S6lJ-VgyzR
z$^zD~Jv*3h{#H;=AMiC?E8q(98{pM`LgB;E@frsV3hY8<s7_?@^>=&GUtIb|2F@=)
z*{a&zT4f{t@wbQ$^$F-wmFk7hZ`-bo71V_>2(UL6{DN;vz6GVA`iwAs{uiF)$7*h_
zx`68K1Y3Ya=ua#6T&X7Abtouolec#S9ApWkb9nJ>vWosL2vMc!T#p-1HILHavj<PA
z1;)$s64xY@f?H0gubOnyPdsV910i!M*5<Q?_Z0HvKAqHrCq&81F&J%xAYxBh(WPk0
z1+Bc$+aKd{O}NN0&pf)KX0To3;^U~>DL>NxiXytBj}H)<yW<G?K{4X{BwvC@VvFtx
z8xo^nykK7u)5DxDUqkSccU5M@{4gMjIl@0r(Sd!>xd~Q=oTGNK&PRDohPrhD`Y<4J
z^zu`TtaYdwNw`Yigt+-4o)v;t_U5)-!i#6z>3vIpwLj6gyDNGz5<4ylk$M1RbQOEz
z{8EovG0Fuj05WBcknG$4XvptS>ijx4@Zdlw&pHnC?As=p+s438$^%U_&M`~;T&qQ;
zT9;|aTuS(fS0ChgT;m?r`s`-AbUDIQF<deQIBFtgI+xFJ8Y5*+K>)-@Y$6P|WOLMS
zwv^{^x+06mWVsRTm8J!L8e|3p<2{vyxR~tH`zuYEJoXFb+MW(d>?bpPru?ZPxkv*9
z(=xuRt;-_w_2=JZsVg^<7lE@+UF*$4o<Pgu*JV6r58BE;(x$$l+SE`ypJWD$0EhjE
z11SIZ^gbJ&C(>)WW1Bt`kWxXnZ0yV-DhngW`zlU1bbdK*v4W5}^~PZ!z^TH6%}1I@
zD2%BS$x3km^8{P`dmO}t)P#*B;g1rd(lGh&6th}3^GHx+*z@HRwJy|4P%F`?d&<Za
z;>wwA(>sseOAiDPCU`}%6Q4Sp&E@1r4E*RZ?qu9#--uj4NPDd6Rmp_!_ot7DcG56o
zLZo97eT)A_jc@%pF*H|>!lJhQs3^ekmJz-~xXb8rb8t-Lq4&Glmzs<Dqty@9IU8f1
z%oCe}23t@(G}dP(|4l0Ir#Uux1hj)X*U@wtMjCk#4z2Y>GIw`FhEnx6JM_jz5vYQl
zPcSdrp0<KUm64O3TSkM(@%ycF=~1N|5`uMf0gjHs%8eFLN;`JRQ@#C_6L<~7JNGxQ
zB#d`k7xa~+4XDtW7lNc>`;BlTF^0bx1S6O!z#-TG;{pO6@lJ)&-(4buYCc>#4T!?k
zGRYFNoNxr0qHy7oi|G?F<X#~?a^!Zxxs;AvwwQeYaY-MO^DJW*Q3Ms$$C?z#nVwk#
zeJZr0l*Zj)y}w~;jt`tXC@cJ4jpH>1S%UEV_lt&4tBS;u775tNG9tY4G@2{u7kw8M
zeA9OajnhsYvA1qKW}O8hr?pFp2P;$?YYu%GL<3FW`G5QEExrm-bL-<9&DV{g%d_Dq
z=e$RI^jtZ;_ut7xI)W@||Fwwn{r+;7`!yGB7$8PAkWAd16$*^a<k&pPdg6*sk!)qM
zKCPCtd9$?Nw)RcMC=3^octpq2$|gLp;Y5T%R2rn?5-Bz5N|%S;C@PV%we-+FMhfEt
z%i-4OZHNZ3!cmez0?E%4ouFUUDtsNze#!R3gP}=1qw~JSGlK$hM-n>L6Fj4<+zAG7
zD6v_8gU!=ccT*0%PZ&(3pjfK8esgZBU6J%|=;3Hpw%I>I7xf72b?=AGeVtw_A!z7M
zLgdwwun}#040+ojoNCFfy+83In$rBnq;0xAUfg`!TN30I<VXwf+y542NtW+=)=9Zr
z<6t6!$dAY6EJd!KzeVE>FUREGfT1hMi_6oftOIW^r*mxl&(%!qL`JHPEk3BHe3QPX
zv~tH0m|Cc7;i@dZs}SGhz%|IZp;dsL-f*BRzBtf@IH}wudq+sB1S_B7KD5fN3Ge*7
z$*;=N(?1kxzfNGO-4<C9m;7F1P30RdXZm!8NO5i(u712_F3&Tzh+)8MD$g;gD>m77
z1ZZsu&;J?$BSw?@Y_cV)1I|(^kIg)TD<bM%d_;EhR7{!~hRcUG5(6JF%oDEE+MJkJ
zaCV4JUdCgSe~GGoyRRgmd!J(ysyt2^uv<eF`iolR`x4e+jQ>7_nRSkQXn1V&a8w#P
zk}8(rMK9M8F0~u=sv(+|ziowH*qpH#ypXWB`#`13ZV>N@UQqVwr7xQ~i;JGS?@NC<
zkrk7JCC#qDdzJ+e4X0O=DrlrNlyMF<GSk3FxN>nV^JxCZXLz0TpE<~3VEhqE9RH(V
zL?V&Q>MNx|!t|uu-Z<Yc*bj#Ju}$-YB+az7eZx1RqG&~@>`|AF(tx0fQo?bK(>sto
z&E8!zR0bU?>{Z%h=<rC&Fpl`%)3c$<ch6MOcCi$I)Z^=RzJdR;x6Exihz+2(zs;?6
z(9u#ytoT0qAYcgJVi57zW#SDpNAgyaGY;0~h;Hy??B3A*)LkZMFw^gF7nbjOK_V$#
z^jDQV8-aRR_l)MuXp4SWX~F<G6sq}t^UsnVt)HWCv@k)ZgYuK%kRr<!T`{@Tb8$RF
zYTV<6hQvdPq#oG~>75dfVfb_3%e?rzcAYt~H?p7g&Ci&L)8DTLlOu3mCa7YGP+F^d
zmQuSBZVZf$D3rdi`-*K`;7wGY4E{mWK~zjPK`$rYk0`ZHi?#R!&zYV!#JPP`6*=FR
zWt7n8PXB2&NG{<WGz3)C(rTW!vk?}5NKXW7oSzh~nl$rQ()jO+#Ak>ix#Sb7U>AM#
zaeu}^0b1pIB51?T?jlGE?*QtF7zc?R<^ckZUOdthLW9==W+Vq^gycY;*^Hq)6Rl#z
z&oD^*upxZ$AKg%98igr=j4vsWz#8wIBZjUb^dW}B#_~_sD&j@Q^5j4O!Ro`JiriD0
zsxmc|Ymkcf4?_B(J*f|Ux?c%?uhCm6|82#5$UqE)ZvzCsSm>AUvgkKU8M!`3bf1NR
z>e3FgCV5D@WKo=l{^8|15r+JePML>bYW6bI)<moOi%pl4VwLlz`s*mgee%o&j$a%g
zt|+bz!_NF04%7lyANXPMf)RBVwS$<4UME_mxM&EdD|GBxJ0l&LLrdxAf(fMHY7@mj
zS?g*Ct5Vy|SY0%XY!&EOPkK+)<XOulAyr;WeeP42ZB^uMWS5c6q572oa;~MJBk(z+
z&?X1k)H{zRNJlZkpopj2EshDfc}e15_Meq*dk=gZ5h};8N%3p!Z$+G$tOR05wz5Ze
z160zOY3s5<2iPik)d)q(y^?DV32I*SAF6x~JeU6#b%3d++qN8Wny24uR@*=x4v7OE
z9FO2K&zfH}rc3g$bl>e_EB3U{VSihe5```X--#qJHc<QDYXrL{lnp*0Ucr8XFXxp@
z+Rld$+x!<4##$rsoD797xjpZmbMX{E@e1ogOoE^&j<)+A%D`VBeXd5VZ|_hNzTIQm
z;{<xw$5}P2=u!2vTBW1ZfTCgofpqSjz|0pE!N6i`b670-+t?Dk8o3mGG-k=jkY0-T
zDY)syZ8K`wR##GYcd#=QK=}qQD@$!`3RnLDLrPJVF`o+~WKdR2i~QB!c?@`-XiTT_
zWGulh1u;5&PV5lX(g^>`e?Z8A^Lv|UJlBIym;6++vx3hGd`r*ov6#KW+t6PyMSQ)%
zhvG^L#u%o(-_DF@P_ly^m7v5m4BKr;Q-Qo*(_`Lq<aRh<$1vF@RE>@8J$j)c9pbA>
z47!j1B^2AZL%Z_h#gOW|l{Q-kXYx_QR?y4!;I2aUw5^Yxe8^^^`^Dg=F2;j9)%Ubv
z_E3m_{G@LU4_Xe`yL-mki;n8xYA+eJM#w%rLz%Yp@T120BKgYGCCiP?u>4zi3X+|v
z0raUppV4&TrgN_EANz5+edDoB^vpTQam^B<Ar6d;J$+;q%^@wJMcQ1Yh9B850_NG+
z6r;YYnvd}s(GAqJ@=J5I2vLrt6_Ob|V~(WB*=i#a8Md#kq!^+-rJ{j$47%UM&RLTA
zjfMPM+cd7TWs~#@;Sbaf5$=T7Jj>^+e1r?43*T9J)u&tZ?Ig+uF}sjfh`oMC$ego*
z22(9SUz1e((^k=adIvev2#XW)*HW2k@fp8GwYs_!I!ouwg6|hc;ztOd=#lcKegeDS
z5Ct+$2?uuPKQ4X=4Z766^7BA`cKagzb%{N0L@9R{N=1K7YC5j+78nTi9>HR88G9|!
zhh9MSQRbv9jE?=8X?>en{-VUR??XTJ5x#PcX)~2@+`6dy<9$k|94y|_+>^f|qK0$4
z;ah6>644s1*ukc>kQ>Zh@!mK}+Esv?@>ZI2%$X{d#+bK}k?qtY%srxU@!+BMezEd(
z%rnbmEi6taf0W0U9prd&dyNWmB=6Ypoz?6Qo}Ka3_h%;arc@x_yE_bGW^IbeegQQY
zi<Qsej{0VK_xglQ8vI40(!_ywrhjaz09+|{Paq6B8`PC!3XQhQ=C(QFvL8}jnklAb
zOs45HEN#(YjglU|#M2e9DQ93l<8m3f>zgvnt<bo!MYdO5Pvj&=BM*l(<)hl%FF~*L
z8>#W<7bL+5efC6>F=Z)2of8g%kNcQ3*{6SmKrI!pJ}QF(MaS(fS9(N2`afQn6kg8>
z&TW~17;?e<tq0P0G_#QXdz~>Y$sS%&;R2TOeQLOLylF$7TiB+9V!hec^IidbyM^lg
z&9K{YZsVAlO~l>v%ssL{`21oGAJ3LV-reV|igOS?Qp`;VZmn1(4<i-&dx(#QzaqCX
z{vIJ=T%mDrlhGPJRy~?w2et7xbl*P;IbkpT_$uWaUA}|QJ63nqymJM&I@Hrtw3A(r
zY)^CnTg)e%573Q*q(!boyg-mxic#m}Y~F&t*b;Lp(`cU8bM2Mdu&4=4e%i{DX3J^W
z+FWPZNas_N84R1FdA+&epVz(cSw1({;YiKRoEHl2QZUA33D6TsMd80NE?92G>9Py<
z26^T@KcVXyMv49cCsSB(SciR^p;9@Ld{>~-&{=>{*=oWWoFE*4n7E-Q`;nkYUDWt<
zuna?s2U*0|FQ9e$T<9{im_3~^eON^DIVDnukH~{Z0ZJc8GIj+hP!tqgN{=^fO2R9<
zFUC0zaJ!!&X4_9B1t*PSa|RRV3)~%Ju=}_)*=u{>TxL8(!j=yLSYhlC%S9PZc^{#l
z%732jyYF60ul`OGQ)L=HknXKSDWikWv>gfU1sB9Eq$YB@9hF&|4SO8t0;sCYPyp7$
zw-o$~u(30Je@Qow>0d_CnHDy+hMkm_r60oQyojXR2|2Gs^P_zWia{~4XgkIuW*jz$
z%FCs1xjVZy+s3)Gx%%0353RZsC}Nz|n5W;Bj$G4sFDBe|L-mQ#V&|Q!i1Bi!=h@WG
z2nRQLsp;9oKF52t|10WKQpN4#$1ga@3|oTsljxiEZm*aKyvKIJ7?tTXLijKQ-i?@8
z@?5RUSOo78*vNhqC_tJJJ<(~8-FwWEwyfKHjnTenWGxndXIkDO{&2uDE!9zEJR$do
z1zDCw{IB98<=hO`UdjT>GsXem?XBqQ+-Da@Eb)KbLotgb@DHPhImS3H98-9Z#+$v@
z9R$bkgUCy0PfTaZ)&!8)aOTUpm>wa|r(Z)z^sXP+8{JQzjy8JOJp6_gQ!n7kbh%c!
zJkB7AAZb$73-KQB!t}2u4FA9rQCy+e6Ejb42_5VRsbi;NzP%vxwjSB58b?xlNt7MO
z(M?PzXaCkN=TI}P`f!Qj0WlcUkCpk$vKq`LjBJ$QW0!IS0zqo&=2W*PjeC)Ber(jz
zt#k*VMY4uZb1+}1C>+t9Ol!mmowALsxC>YqDpdH3ZWO5T#-w4MD9j_g_8ylHJZ=4`
zm1Ysnq0y-1K8o)il;SdC2a^8rz8xKtrevaAS=<`fq?#&JX1%dzJ9V4U+%4OmKX304
z8)Jex$Sx^jZ`>Gpe;BXI;H3I{4?vePMt_#<G0H=}`4qG%2X<4Yb>;?nhr}Mc(6$$s
za=gnK4;Ag-BZ&WNY;Pc7+)IRSR#cEr*f`0LkvcxLKF1j+gXFYLqBap_X$I9;yc!tx
z1Ac@^(z<Ax_I)gIjyb@Ap)$GuL>pkXZE~dPtC8?65t0kH)tCA6M2?__w5b>)b2WQy
z)3m)u8x^8@Nf7iw_B(%VS34JUWMXP3HHsvVw&KF)p<iLQVywvbhB@!&)rz8$XZ--U
zF+kXfVRNh~Lv~?+J{%&Gps0MS4+P;+^YG5$2-VW)jG1X^<=-w)mE8f-nWELF8iGzL
zIsu~n@4^9oWnC&gkW6Ba7~T-p@c2UsQ}*=J?Z4|~KIc7ipAz2eOPi^RS8JsY@g=ZC
zW98Ts-UrE*u)ld-z;8M|Lq#nTld@?{<{RC=gVT8M^`Cn1hX5z#t)SbQeFvuaG2xDy
zFa9ZGyo(X=7M`f&DB*|bf|bJT?Cc3;5!;Mfvyn#20bf6$j$Q0P+;1=BeIkO=TFYCu
z5%@4Fyz{`XK}1X}dR}N~0}&*K@DT#GLsY{(;iaa%?2V>xmf!9}r7B2A$5GQwDeY|B
ze*bnyEW$w^>rb%J#b;e}lmF^dMvXX+#C&TLo-JtlpYe$5GxOY&bj|k6{fsD}2oS>~
ztPn8w1!9T+6=3OMkvnzWZI1iZ@b*2#=&6&i9i5q|@9l{ZsM*kGj*WExm6JZ+eubWK
Y<LZP~MUMww+zBNor6gG{ZW#Fg0RK!1PXGV_

literal 0
HcmV?d00001

diff --git a/lib/Makefile b/lib/Makefile
new file mode 100755
index 0000000..38cd297
--- /dev/null
+++ b/lib/Makefile
@@ -0,0 +1,4 @@
+all:
+	cd nms; python setup_linux.py build_ext --inplace; rm -rf build; cd ../../
+clean:
+	cd nms; rm *.so; cd ../../
diff --git a/lib/config/__init__.py b/lib/config/__init__.py
new file mode 100644
index 0000000..a44e926
--- /dev/null
+++ b/lib/config/__init__.py
@@ -0,0 +1,9 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from .default import _C as cfg
+from .default import update_config
+from .models import MODEL_EXTRAS
diff --git a/lib/config/default.py b/lib/config/default.py
new file mode 100644
index 0000000..f294459
--- /dev/null
+++ b/lib/config/default.py
@@ -0,0 +1,181 @@
+
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+from yacs.config import CfgNode as CN
+
+
+_C = CN()
+
+_C.OUTPUT_DIR = ''
+_C.LOG_DIR = ''
+_C.DATA_DIR = ''
+_C.GPUS = (0,)
+_C.WORKERS = 4
+_C.PRINT_FREQ = 20
+_C.AUTO_RESUME = False
+_C.PIN_MEMORY = True
+_C.RANK = 0
+
+# Cudnn related params
+_C.CUDNN = CN()
+_C.CUDNN.BENCHMARK = True
+_C.CUDNN.DETERMINISTIC = False
+_C.CUDNN.ENABLED = True
+
+# common params for NETWORK
+_C.MODEL = CN()
+_C.MODEL.NAME = 'pose_hrnet'
+_C.MODEL.INIT_WEIGHTS = True
+_C.MODEL.PRETRAINED = ''
+_C.MODEL.NUM_JOINTS = 17
+_C.MODEL.TAG_PER_JOINT = True
+_C.MODEL.TARGET_TYPE = 'gaussian'
+_C.MODEL.IMAGE_SIZE = [256, 256]  # width * height, ex: 192 * 256
+_C.MODEL.HEATMAP_SIZE = [64, 64]  # width * height, ex: 24 * 32
+_C.MODEL.SIGMA = 2
+_C.MODEL.EXP_DECAY_LAMBDA = 0.3
+_C.MODEL.EXTRA = CN(new_allowed=True)
+
+_C.LOSS = CN()
+_C.LOSS.USE_OHKM = False
+_C.LOSS.TOPK = 8
+_C.LOSS.USE_TARGET_WEIGHT = True
+_C.LOSS.USE_DIFFERENT_JOINTS_WEIGHT = False
+
+_C.LOSS.POSE_LOSS_FUNC = 'MSE'
+# _C.LOSS.POSE_LOSS_FUNC = 'BALANCED_BCE'
+_C.LOSS.BALANCED_BCE_FAIRNESS_QUOTIENT = 1.0
+# _C.LOSS.POSE_LOSS_FUNC = 'WEIGHTED_BCE'
+_C.LOSS.POSITIVE_LABEL_WEIGHT = 1
+
+_C.LOSS.POSE_HEATMAP_WEIGHT = 1 / 2
+_C.LOSS.ASSOC_EMBEDDING_WEIGHT = 1 / 1000
+
+# DATASET related params
+_C.DATASET = CN()
+_C.DATASET.ROOT = ''
+_C.DATASET.CVAT_XML = ''
+_C.DATASET.DATASET = 'mpii'
+_C.DATASET.TRAIN_SET = 'train'
+_C.DATASET.TEST_SET = 'valid'
+_C.DATASET.TEST_SET_PROPORTION = 0.1
+_C.DATASET.DATA_FORMAT = 'jpg'
+_C.DATASET.HYBRID_JOINTS_TYPE = ''
+_C.DATASET.SELECT_DATA = False
+
+# training data augmentation
+_C.DATASET.FLIP = True
+_C.DATASET.SCALE = 1.0
+_C.DATASET.SCALE_FACTOR = 0.25
+_C.DATASET.ROT_FACTOR = 30
+_C.DATASET.PROB_HALF_BODY = 0.0
+_C.DATASET.NUM_JOINTS_HALF_BODY = 8
+_C.DATASET.COLOR_RGB = False
+_C.DATASET.PROB_RANDOMIZED_OCCLUSION = 0.0
+_C.DATASET.MAX_OCCLUSION_SIZE = 100
+_C.DATASET.OCCLUSION_OPACITIES = [0.75, 1.0]
+_C.DATASET.PROB_RANDOMIZED_CENTER = 0.0
+_C.DATASET.JITTER_CENTER = 0.0
+_C.DATASET.JITTER_BRIGHTNESS = 0.0
+_C.DATASET.JITTER_CONTRAST = 0.0
+_C.DATASET.JITTER_SATURATION = 0.0
+
+# train
+_C.TRAIN = CN()
+
+_C.TRAIN.LR_FACTOR = 0.1
+_C.TRAIN.LR_STEP = [90, 110]
+_C.TRAIN.LR = 0.001
+
+_C.TRAIN.OPTIMIZER = 'adam'
+_C.TRAIN.MOMENTUM = 0.9
+_C.TRAIN.WD = 0.0001
+_C.TRAIN.NESTEROV = False
+_C.TRAIN.GAMMA1 = 0.99
+_C.TRAIN.GAMMA2 = 0.0
+
+_C.TRAIN.BEGIN_EPOCH = 0
+_C.TRAIN.END_EPOCH = 140
+
+_C.TRAIN.RESUME = False
+_C.TRAIN.CHECKPOINT = ''
+
+_C.TRAIN.BATCH_SIZE_PER_GPU = 32
+_C.TRAIN.SHUFFLE = True
+
+# testing
+_C.TEST = CN()
+
+# size of images for each device
+_C.TEST.BATCH_SIZE_PER_GPU = 32
+# Test Model Epoch
+_C.TEST.FLIP_TEST = False
+_C.TEST.POST_PROCESS = False
+_C.TEST.SHIFT_HEATMAP = False
+
+_C.TEST.USE_GT_BBOX = False
+
+# nms
+_C.TEST.IMAGE_THRE = 0.1
+_C.TEST.NMS_THRE = 0.6
+_C.TEST.SOFT_NMS = False
+_C.TEST.OKS_THRE = 0.5
+_C.TEST.IN_VIS_THRE = 0.0
+_C.TEST.COCO_BBOX_FILE = ''
+_C.TEST.BBOX_THRE = 1.0
+_C.TEST.MODEL_FILE = ''
+
+# debug
+_C.DEBUG = CN()
+_C.DEBUG.DEBUG = False
+_C.DEBUG.SAVE_BATCH_IMAGES_GT = False
+_C.DEBUG.SAVE_BATCH_IMAGES_PRED = False
+_C.DEBUG.SAVE_HEATMAPS_GT = False
+_C.DEBUG.SAVE_HEATMAPS_PRED = False
+
+
+def update_config(cfg, args):
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    cfg.merge_from_list(args.opts)
+
+    if args.modelDir:
+        cfg.OUTPUT_DIR = args.modelDir
+
+    if args.logDir:
+        cfg.LOG_DIR = args.logDir
+
+    if args.dataDir:
+        cfg.DATA_DIR = args.dataDir
+
+    cfg.DATASET.ROOT = os.path.join(
+        cfg.DATA_DIR, cfg.DATASET.ROOT
+    )
+
+    cfg.MODEL.PRETRAINED = os.path.join(
+        cfg.DATA_DIR, cfg.MODEL.PRETRAINED
+    )
+
+    if cfg.TEST.MODEL_FILE:
+        cfg.TEST.MODEL_FILE = os.path.join(
+            cfg.DATA_DIR, cfg.TEST.MODEL_FILE
+        )
+
+    cfg.freeze()
+
+
+if __name__ == '__main__':
+    import sys
+    with open(sys.argv[1], 'w') as f:
+        print(_C, file=f)
+
diff --git a/lib/config/models.py b/lib/config/models.py
new file mode 100644
index 0000000..8e04c4f
--- /dev/null
+++ b/lib/config/models.py
@@ -0,0 +1,58 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from yacs.config import CfgNode as CN
+
+
+# pose_resnet related params
+POSE_RESNET = CN()
+POSE_RESNET.NUM_LAYERS = 50
+POSE_RESNET.DECONV_WITH_BIAS = False
+POSE_RESNET.NUM_DECONV_LAYERS = 3
+POSE_RESNET.NUM_DECONV_FILTERS = [256, 256, 256]
+POSE_RESNET.NUM_DECONV_KERNELS = [4, 4, 4]
+POSE_RESNET.FINAL_CONV_KERNEL = 1
+POSE_RESNET.PRETRAINED_LAYERS = ['*']
+
+# pose_multi_resoluton_net related params
+POSE_HIGH_RESOLUTION_NET = CN()
+POSE_HIGH_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
+POSE_HIGH_RESOLUTION_NET.STEM_INPLANES = 64
+POSE_HIGH_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
+
+POSE_HIGH_RESOLUTION_NET.STAGE2 = CN()
+POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
+POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
+POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
+POSE_HIGH_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [32, 64]
+POSE_HIGH_RESOLUTION_NET.STAGE2.BLOCK = 'BASIC'
+POSE_HIGH_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
+
+POSE_HIGH_RESOLUTION_NET.STAGE3 = CN()
+POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
+POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
+POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
+POSE_HIGH_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [32, 64, 128]
+POSE_HIGH_RESOLUTION_NET.STAGE3.BLOCK = 'BASIC'
+POSE_HIGH_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
+
+POSE_HIGH_RESOLUTION_NET.STAGE4 = CN()
+POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
+POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
+POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
+POSE_HIGH_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [32, 64, 128, 256]
+POSE_HIGH_RESOLUTION_NET.STAGE4.BLOCK = 'BASIC'
+POSE_HIGH_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
+
+
+MODEL_EXTRAS = {
+    'pose_resnet': POSE_RESNET,
+    'pose_high_resolution_net': POSE_HIGH_RESOLUTION_NET,
+}
diff --git a/lib/core/assocembedfunc.py b/lib/core/assocembedfunc.py
new file mode 100644
index 0000000..8c65631
--- /dev/null
+++ b/lib/core/assocembedfunc.py
@@ -0,0 +1,155 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+ 
+import time
+import logging
+import os
+
+import numpy as np
+import torch
+
+from core.function import AverageMeter
+
+
+logger = logging.getLogger(__name__)
+
+
+def train(
+        config,
+        train_loader,
+        model,
+        criterion,
+        optimizer,
+        dict_writer,
+        summary_writer,
+        epoch,
+        device=None):
+
+    if device is None:
+        device = next(model.parameters()).device
+
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    batch_count = len(train_loader)
+    for i, label_batch in enumerate(train_loader):
+
+        batch_size = label_batch['image'].size(0)
+
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        # compute output
+        img_batch = label_batch['image'].to(device=device, non_blocking=True)
+        if img_batch.size(1) == 1:
+            img_batch = torch.cat([img_batch] * 3, dim=1)
+        output = model(img_batch)
+
+        loss = criterion(output, label_batch)
+        summary_writer.add_scalars(
+            'Loss/train',
+            criterion.loss_components.copy(),
+            epoch * batch_count + i)
+
+        # compute gradient and do update step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        losses.update(loss.item(), batch_size)
+
+        # measure elapsed time
+        elapsed_time = time.time() - end
+        batch_time.update(elapsed_time)
+        end = time.time()
+
+        if i % config.PRINT_FREQ == 0:
+            msg = 'Epoch: [{0}][{1}/{2}]\t' \
+                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+                  'Speed {speed:.1f} samples/s\t' \
+                  'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+                  'Loss {loss.val:.7f} ({loss.avg:.7f})'.format(
+                      epoch, i, len(train_loader), batch_time=batch_time,
+                      speed=batch_size/batch_time.val,
+                      data_time=data_time, loss=losses)
+            logger.info(msg)
+
+        dict_writer.writerow({
+            'Epoch': epoch,
+            'Batch': i,
+            'Loss': loss.item(),
+            'Batch Time': elapsed_time,
+            'Batch Size': batch_size,
+        })
+
+
+def validate(
+        config,
+        val_loader,
+        model,
+        criterion,
+        dict_writer,
+        summary_writer,
+        epoch,
+        device=None):
+
+    if device is None:
+        device = next(model.parameters()).device
+
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+
+    # switch to evaluate mode
+    model.eval()
+
+    with torch.no_grad():
+        end = time.time()
+        for i, label_batch in enumerate(val_loader):
+
+            batch_size = label_batch['image'].size(0)
+            img_batch = label_batch['image'].to(device=device, non_blocking=True)
+            if img_batch.size(1) == 1:
+                img_batch = torch.cat([img_batch] * 3, dim=1)
+            output = model(img_batch)
+
+            loss = criterion(output, label_batch)
+
+            # measure accuracy and record loss
+            losses.update(loss.item(), batch_size)
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            if i % config.PRINT_FREQ == 0:
+                msg = 'Test: [{0}/{1}]\t' \
+                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
+                      'Loss {loss.val:.7f} ({loss.avg:.7f})'.format(
+                          i, len(val_loader), batch_time=batch_time, loss=losses)
+                logger.info(msg)
+
+        perf_indicator = -losses.avg
+
+        dict_writer.writerow({
+            'Epoch': epoch,
+            'Loss': losses.avg,
+            'Performance Indicator': perf_indicator,
+        })
+        summary_writer.add_scalar(
+            'Loss/validation',
+            losses.avg,
+            epoch)
+
+    return perf_indicator
diff --git a/lib/core/assocembedloss.py b/lib/core/assocembedloss.py
new file mode 100644
index 0000000..e63bc02
--- /dev/null
+++ b/lib/core/assocembedloss.py
@@ -0,0 +1,306 @@
+import torch
+import torch.nn as nn
+
+"""
+An implementation of the associative embedding loss function described in
+"Associative Embedding: End-to-End Learning for Joint Detection and Grouping"
+(Newell et al.)
+"""
+
+def _get_assoc_embed_values(assoc_embed_maps, pose_xy):
+    """
+    Extract the embedding values from the maps at the given
+    XY pose coordinates.
+    """
+
+    pose_xy = pose_xy.round().long()
+
+    instance_count, keypoint_count, xy_size = pose_xy.shape
+    _, height, width = assoc_embed_maps.shape
+
+    assert keypoint_count == 12
+    assert xy_size == 2
+
+    # we need to mask out points that are out of bounds
+    keypoint_vis = (
+        (pose_xy[..., 0] >= 0) & (pose_xy[..., 0] < width) &
+        (pose_xy[..., 1] >= 0) & (pose_xy[..., 1] < height)
+    )
+
+    def gen_embed_vals():
+
+        # TODO: there is probably a more efficient tensor indexing approach
+        # to extract these values, but this should work
+
+        for instance_i in range(instance_count):
+            for keypoint_i in range(keypoint_count):
+                if keypoint_vis[instance_i, keypoint_i].item():
+                    curr_x, curr_y = pose_xy[instance_i, keypoint_i, :]
+                    curr_embed = assoc_embed_maps[keypoint_i, curr_y, curr_x]
+                    yield curr_embed
+                else:
+                    # since this point is out of bounds we just return zero,
+                    # but respect the device and dtype of the map
+                    yield torch.tensor(
+                        0,
+                        device=assoc_embed_maps.device,
+                        dtype=assoc_embed_maps.dtype)
+
+    embed_vals = torch.stack(list(gen_embed_vals()))
+    embed_vals = embed_vals.reshape(instance_count, keypoint_count)
+
+    return embed_vals, keypoint_vis
+
+
+def _instance_grouping_term(embed_vals, keypoint_vis, reference_embeds):
+    """
+    This function implements the first sub-expression from the grouping loss
+    in the associative embedding paper. This sub-expression incentivizes
+    grouping within instances
+    """
+    instance_count = reference_embeds.size(0)
+
+    if instance_count == 0:
+        # there needs to be at least one instance for this term
+        # to contribute to the loss
+        return torch.tensor(
+            0,
+            device=reference_embeds.device,
+            dtype=reference_embeds.dtype)
+
+    else:
+        squared_diffs = (reference_embeds.view(-1, 1) - embed_vals) ** 2
+        squared_diffs[~keypoint_vis] = 0
+
+        return torch.sum(squared_diffs) / instance_count
+
+
+def _ref_embedding_separation_term(reference_embeds, sigma=1):
+
+    """
+    This function implements the second sub-expression from the grouping loss
+    in the associative embedding paper. This sub-expression incentivizes
+    separation between reference embedding values
+    """
+
+    instance_count = reference_embeds.size(0)
+
+    if instance_count <= 1:
+        # there needs to be at least two instances for this term
+        # to contribute to the loss since it is based upon a difference
+        # of reference embeddings.
+        return torch.tensor(
+            0,
+            device=reference_embeds.device,
+            dtype=reference_embeds.dtype)
+
+    else:
+
+        # calculate the squared difference between all combinations of the reference embedding
+        ref_embed_combos = torch.combinations(reference_embeds, 2)
+        num_combos = ref_embed_combos.size(0)
+        ref_embed_combos[:, 1].mul_(-1)
+        squared_diffs = ref_embed_combos.sum(1) ** 2
+
+        # now we have squared diffs and we can calculate the sum of exponents
+        # part of the subexpression
+        sum_of_exps = torch.sum(torch.exp(-0.5 * sigma ** 2 * squared_diffs))
+
+        # In the paper the denominator is N^2 because they calculate for all permutations.
+        # Since we use all combinations rather than permutations we use a different
+        # denominator to achieve the same result
+        return sum_of_exps / (num_combos + instance_count / 2)
+
+
+def balanced_bcelogit_loss(inf_maps, lbl_maps, fairness_quotient):
+
+    assert 0.0 <= fairness_quotient <= 1.0
+
+    total_len = 0
+    for i, dim_len in enumerate(lbl_maps.size()):
+        if i == 0:
+            total_len = dim_len
+        else:
+            total_len *= dim_len
+
+    raw_loss = torch.nn.functional.binary_cross_entropy_with_logits(
+        inf_maps,
+        lbl_maps,
+        reduction='none')
+
+    # split the losses between true labels and false labels
+    true_lbl_losses = raw_loss[lbl_maps == 1]
+    true_lbl_count = len(true_lbl_losses)
+    false_lbl_losses = raw_loss[lbl_maps == 0]
+    false_lbl_count = len(false_lbl_losses)
+
+    assert total_len == true_lbl_count + false_lbl_count
+
+    if fairness_quotient == 0 or true_lbl_count == 0 or false_lbl_count == 0:
+        # We've either been asked to apply zero fairness or
+        # we're missing one of the true/false classes so we can't balance
+        # the loss here
+        return raw_loss.mean()
+    elif fairness_quotient == 1:
+        # return a balanced loss (where true and false cases contribute equally)
+        return (true_lbl_losses.mean() + false_lbl_losses.mean()) / 2
+    else:
+        # we mix balanced and imbalanced losses according to the fairness quotient
+        # TODO: there is a more efficient way to do this. We don't need to sum over
+        #       the raw losses twice.
+        balanced_loss = (true_lbl_losses.mean() + false_lbl_losses.mean()) / 2
+        imbalanced_loss = raw_loss.mean()
+
+        return balanced_loss * fairness_quotient + imbalanced_loss * (1.0 - fairness_quotient)
+
+
+
+def weighted_bcelogit_loss(inf_maps, lbl_maps, pos_weight):
+
+    total_len = 0
+    for i, dim_len in enumerate(lbl_maps.size()):
+        if i == 0:
+            total_len = dim_len
+        else:
+            total_len *= dim_len
+
+    raw_loss = torch.nn.functional.binary_cross_entropy_with_logits(
+        inf_maps,
+        lbl_maps,
+        reduction='none')
+
+    # split the losses between true labels and false labels
+    true_lbl_losses = raw_loss[lbl_maps == 1]
+    true_lbl_count = len(true_lbl_losses)
+    false_lbl_losses = raw_loss[lbl_maps == 0]
+    false_lbl_count = len(false_lbl_losses)
+
+    assert total_len == true_lbl_count + false_lbl_count
+
+    if true_lbl_count == 0 or false_lbl_count == 0:
+        # we're missing one of the true/false classes so we can't weight
+        return raw_loss.mean()
+    else:
+        # return a weighted loss
+        numerator = true_lbl_losses.sum() * pos_weight + false_lbl_losses.sum()
+        denominator = true_lbl_count * pos_weight + false_lbl_count
+
+        return numerator / denominator
+
+
+class PoseEstAssocEmbedLoss(nn.Module):
+
+    """
+    Combines an MSE (L2) loss for the pose heatmaps with an associative embedding loss
+    """
+
+    def __init__(
+            self,
+            pose_heatmap_weight=1.0,
+            assoc_embedding_weight=1.0,
+            separation_term_weight=1.0,
+            grouping_term_weight=1.0,
+            sigma=1.0,
+            pose_loss_func=None):
+
+        super(PoseEstAssocEmbedLoss, self).__init__()
+
+        self.pose_heatmap_weight = pose_heatmap_weight
+        self.assoc_embedding_weight = assoc_embedding_weight
+        self.separation_term_weight = separation_term_weight
+        self.grouping_term_weight = grouping_term_weight
+        self.sigma = sigma
+        self.pose_loss_func = pose_loss_func
+
+        self.loss_components = dict()
+
+    def forward(self, inference_tensor, truth_labels):
+
+        # we need to combine the embedding loss and the keypoint L2 loss
+        est_device = inference_tensor.device
+
+        # put all truth labels on the same device as the inference
+        lbl_joint_heatmaps = truth_labels['joint_heatmaps'].to(
+            device=est_device,
+            non_blocking=True)
+        lbl_pose_instances = truth_labels['pose_instances'].to(
+            device=est_device,
+            non_blocking=True)
+        lbl_instance_count = truth_labels['instance_count'].to(
+            device=est_device,
+            non_blocking=True)
+        pose_keypoint_count = lbl_joint_heatmaps.size(1)
+
+        inf_joint_heatmaps = inference_tensor[:, :pose_keypoint_count, ...]
+        inf_assoc_embed_map = inference_tensor[:, pose_keypoint_count:, ...]
+
+        if self.pose_loss_func is not None:
+            pose_loss = self.pose_loss_func(
+                inf_joint_heatmaps,
+                lbl_joint_heatmaps)
+        else:
+            pose_loss = nn.functional.mse_loss(
+                inf_joint_heatmaps,
+                lbl_joint_heatmaps)
+        embed_loss = self.pose_assoc_embed_loss(
+            inf_assoc_embed_map,
+            lbl_pose_instances,
+            lbl_instance_count)
+        combined_loss = pose_loss * self.pose_heatmap_weight + embed_loss * self.assoc_embedding_weight
+
+        self.loss_components['pose_loss'] = pose_loss.detach()
+        self.loss_components['embed_loss'] = embed_loss.detach()
+        self.loss_components['weighted_pose_loss'] = pose_loss.detach() * self.pose_heatmap_weight
+        self.loss_components['weighted_embed_loss'] = embed_loss.detach() * self.assoc_embedding_weight
+        self.loss_components['combined_loss'] = combined_loss.detach()
+
+        return combined_loss
+
+    def pose_assoc_embed_loss(
+            self,
+            batch_assoc_embed_maps,
+            batch_target_poses_xy,
+            batch_instance_counts):
+
+        batch_size = batch_target_poses_xy.size(0)
+        batch_losses = torch.zeros(
+            batch_size,
+            device=batch_assoc_embed_maps.device,
+            dtype=batch_assoc_embed_maps.dtype)
+
+        for sample_i in range(batch_size):
+
+            # pull out the values corresponding to the current sample in the mini batch
+            instance_count = batch_instance_counts[sample_i]
+            assoc_embed_maps = batch_assoc_embed_maps[sample_i, ...]
+            target_poses_xy = batch_target_poses_xy[sample_i, :instance_count, ...]
+
+            # extract the embedding values at the "truth" XY coordinates for each
+            # instance and calculate the reference embedding
+            embed_vals, keypoint_vis = _get_assoc_embed_values(assoc_embed_maps, target_poses_xy)
+
+            # remove instances with no visible keypoints
+            keypoint_vis_counts = keypoint_vis.sum(1)
+            visible_instances = keypoint_vis_counts > 0
+
+            keypoint_vis_counts = keypoint_vis_counts[visible_instances]
+            embed_vals = embed_vals[visible_instances, :]
+            keypoint_vis = keypoint_vis[visible_instances, :]
+
+            reference_embeds = embed_vals.sum(1) / keypoint_vis.sum(1).to(embed_vals)
+
+            # we take the loss expression defined in the associative embedding paper
+            # and break it down into two sub-expressions: an instance grouping part
+            # and a reference embedding separation part. We also apply a weighting to
+            # each of these loss components which is not in the paper's approach
+            inst_grp_term = _instance_grouping_term(embed_vals, keypoint_vis, reference_embeds)
+            if self.grouping_term_weight != 1:
+                inst_grp_term *= self.grouping_term_weight
+
+            sep_term = _ref_embedding_separation_term(reference_embeds, sigma=self.sigma)
+            if self.separation_term_weight != 1:
+                sep_term *= self.separation_term_weight
+
+            batch_losses[sample_i] = inst_grp_term + sep_term
+
+        return batch_losses.mean()
diff --git a/lib/core/cornerfunction.py b/lib/core/cornerfunction.py
new file mode 100644
index 0000000..39e4948
--- /dev/null
+++ b/lib/core/cornerfunction.py
@@ -0,0 +1,367 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+# NOTE:
+#   This code is based on function.py code. The name was kept
+#   function.py by Massimo. I (KSS) renamed it so that the original
+#   function.py code will work as is but that means that this code will
+#   not work until it is reintegrated back into the codebase
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+ 
+import time
+import logging
+import os
+
+import matplotlib.pyplot as plt
+
+import numpy as np
+import torch
+
+import cv2
+
+from core.evaluate import accuracy
+from core.inference import get_final_preds
+from utils.transforms import flip_back
+from utils.vis import save_debug_images
+
+import imageio
+
+logger = logging.getLogger(__name__)
+
+
+def train(config, train_loader, model, criterion, optimizer, epoch,
+          output_dir, tb_log_dir, summary_writer, writer_dict=None, dict_writer=None):
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+    acc = AverageMeter()
+
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    for i, (input, target, target_weight, meta) in enumerate(train_loader):
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        # compute output
+        outputs = model(input)
+
+        # if i == 1:
+        #     plt.imshow(input[0, 0, :, :])
+        #     plt.show()
+        #     plt.imshow(outputs[0, 0, :, :].cpu().detach().numpy())
+        #     plt.show()
+        #     plt.imshow(target[0, :, :])
+        #     plt.show()
+
+        target = target.cuda(non_blocking=True)
+        target_weight = target_weight.cuda(non_blocking=True)
+
+        output_array = outputs.cpu().detach().numpy()
+        target_array = target.cpu().detach().numpy()
+        target_weight_array = target_weight.cpu().detach().numpy()
+
+        # print(output_array.shape)
+        # print(target.shape)
+        # print(target_weight.shape)
+        # print(np.all(target_array==0))
+
+        target_array_im = np.array((target_array[2], target_array[3]), dtype=np.uint8)
+
+        target_array_im = target_array_im = target_array_im.min()
+        target_array_im = 255 * (target_array_im / target_array_im.max())
+        target_array_im = target_array_im.astype(np.uint8)
+
+        target_array = target_array.astype('uint8')
+
+        imageio.imwrite(('INF/inf%s.png' % meta['image'][0]), target_array[0, :, :])     # target_array_im[:, :, 0])
+        # ^ USED TO BE [0, 0, :. :]
+
+        # print(meta['center'])
+
+        (minVal, maxVal, minLoc, maxLoc) = cv2.minMaxLoc(target_array[0, :, :])
+        # print(meta['filename'])
+        # print(maxLoc)
+
+        if isinstance(outputs, list):
+            loss = criterion(outputs[0], target, target_weight)
+            for output in outputs[1:]:
+                loss += criterion(output, target, target_weight)
+        else:
+            output = outputs
+            # print(output.shape, target.shape, target_weight.shape)
+            loss = criterion(output, target, target_weight)
+
+        # loss = criterion(output, target, target_weight)
+
+        # compute gradient and do update step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # measure accuracy and record loss
+        losses.update(loss.item(), input.size(0))
+
+        _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
+                                         target.detach().cpu().numpy())
+        acc.update(avg_acc, cnt)
+
+        # measure elapsed time
+        elapsed_time = time.time() - end
+        batch_time.update(elapsed_time)
+        end = time.time()
+
+        if i % config.PRINT_FREQ == 0:
+            msg = 'Epoch: [{0}][{1}/{2}]\t' \
+                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+                  'Speed {speed:.1f} samples/s\t' \
+                  'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+                  'Loss {loss.val:.7f} ({loss.avg:.7f})\t' \
+                  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
+                      epoch, i, len(train_loader), batch_time=batch_time,
+                      speed=input.size(0)/batch_time.val,
+                      data_time=data_time, loss=losses, acc=acc)
+            logger.info(msg)
+
+            if writer_dict:
+                writer = writer_dict['writer']
+                global_steps = writer_dict['train_global_steps']
+                writer.add_scalar('train_loss', losses.val, global_steps)
+                writer.add_scalar('train_acc', acc.val, global_steps)
+                writer_dict['train_global_steps'] = global_steps + 1
+
+            prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
+            # save_debug_images(config, input, meta, target, pred*4, output,
+            #                   prefix)
+
+        if dict_writer:
+            dict_writer.writerow({
+                'Epoch': epoch,
+                'Batch': i,
+                'Loss': loss.item(),
+                'Accuracy': avg_acc,
+                'Batch Time': elapsed_time,
+                'Batch Size': input.size(0),
+            })
+
+
+        summary_writer.add_scalar(
+            'loss/train',
+            loss.item(),
+            epoch * (len(train_loader)) + i)
+
+
+def validate(config, val_loader, val_dataset, model, criterion, output_dir,
+             tb_log_dir, summary_writer, writer_dict=None, dict_writer=None, epoch=None):
+
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+    acc = AverageMeter()
+
+    # switch to evaluate mode
+    model.eval()
+
+    num_samples = len(val_dataset)
+    all_preds = np.zeros(
+        (num_samples, config.MODEL.NUM_JOINTS, 3),
+        dtype=np.float32
+    )
+    all_boxes = np.zeros((num_samples, 6))
+    image_path = []
+    filenames = []
+    imgnums = []
+    idx = 0
+    with torch.no_grad():
+        end = time.time()
+        for i, (input, target, target_weight, meta) in enumerate(val_loader):
+            # compute output
+            print(input.shape)
+            outputs = model(input)
+
+            if isinstance(outputs, list):
+                output = outputs[-1]
+            else:
+                output = outputs
+
+            if config.TEST.FLIP_TEST:
+                # this part is ugly, because pytorch has not supported negative index
+                # input_flipped = model(input[:, :, :, ::-1])
+                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
+                input_flipped = torch.from_numpy(input_flipped).cuda()
+                outputs_flipped = model(input_flipped)
+
+                if isinstance(outputs_flipped, list):
+                    output_flipped = outputs_flipped[-1]
+                else:
+                    output_flipped = outputs_flipped
+
+                output_flipped = flip_back(output_flipped.cpu().numpy(),
+                                           val_dataset.flip_pairs)
+                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
+
+                # feature is not aligned, shift flipped heatmap for higher accuracy
+                if config.TEST.SHIFT_HEATMAP:
+                    output_flipped[:, :, :, 1:] = \
+                        output_flipped.clone()[:, :, :, 0:-1]
+
+                output = (output + output_flipped) * 0.5
+
+            target = target.cuda(non_blocking=True)
+            target_weight = target_weight.cuda(non_blocking=True)
+
+            loss = criterion(output, target, target_weight)
+
+            num_images = input.size(0)
+            # measure accuracy and record loss
+            losses.update(loss.item(), num_images)
+            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
+                                             target.cpu().numpy())
+
+            acc.update(avg_acc, cnt)
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            c = meta['center'].numpy()
+            # s = meta['scale'].numpy()
+            s = 1
+            # score = meta['score'].numpy()
+            score = 1
+            print(output.shape)
+            preds, maxvals = get_final_preds(
+                config, output.clone().cpu().numpy(), c, s)
+
+            # ERROR
+
+
+            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
+            all_preds[idx:idx + num_images, :, 2:3] = maxvals
+            # double check this all_boxes parts
+            all_boxes[idx:idx + num_images, 0:2] = c
+            all_boxes[idx:idx + num_images, 2:4] = s
+            # all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
+            all_boxes[idx:idx + num_images, 5] = score
+            image_path.extend(meta['image'])
+
+            idx += num_images
+
+            if i % config.PRINT_FREQ == 0:
+                msg = 'Test: [{0}/{1}]\t' \
+                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
+                      'Loss {loss.val:.7f} ({loss.avg:.7f})\t' \
+                      'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
+                          i, len(val_loader), batch_time=batch_time,
+                          loss=losses, acc=acc)
+                logger.info(msg)
+
+                prefix = '{}_{}'.format(
+                    os.path.join(output_dir, 'val'), i
+                )
+                # save_debug_images(config, input, meta, target, pred*4, output,
+                #                  prefix)
+
+        try:
+            name_values, perf_indicator = val_dataset.evaluate(
+                config, all_preds, output_dir, all_boxes, image_path,
+                filenames, imgnums
+            )
+        except NotImplementedError:
+            name_values = []
+            perf_indicator = -losses.avg
+
+        model_name = config.MODEL.NAME
+        if isinstance(name_values, list):
+            for name_value in name_values:
+                _print_name_value(name_value, model_name)
+        else:
+            _print_name_value(name_values, model_name)
+
+        if writer_dict:
+            writer = writer_dict['writer']
+            global_steps = writer_dict['valid_global_steps']
+            writer.add_scalar(
+                'valid_loss',
+                losses.avg,
+                global_steps
+            )
+            writer.add_scalar(
+                'valid_acc',
+                acc.avg,
+                global_steps
+            )
+            if isinstance(name_values, list):
+                for name_value in name_values:
+                    writer.add_scalars(
+                        'valid',
+                        dict(name_value),
+                        global_steps
+                    )
+            else:
+                writer.add_scalars(
+                    'valid',
+                    dict(name_values),
+                    global_steps
+                )
+            writer_dict['valid_global_steps'] = global_steps + 1
+
+        if dict_writer:
+            dict_writer.writerow({
+                'Epoch': epoch,
+                'Loss': losses.avg,
+                'Accuracy': acc.avg,
+                'Performance Indicator': perf_indicator,
+            })
+
+        summary_writer.add_scalar(
+            'validate/train',
+            loss.item(),
+            epoch * (len(val_loader)) + i)
+
+        return perf_indicator
+
+
+# markdown format output
+def _print_name_value(name_value, full_arch_name):
+    names = name_value.keys()
+    values = name_value.values()
+    num_values = len(name_value)
+    logger.info(
+        '| Arch ' +
+        ' '.join(['| {}'.format(name) for name in names]) +
+        ' |'
+    )
+    logger.info('|---' * (num_values+1) + '|')
+
+    if len(full_arch_name) > 15:
+        full_arch_name = full_arch_name[:8] + '...'
+    logger.info(
+        '| ' + full_arch_name + ' ' +
+        ' '.join(['| {:.3f}'.format(value) for value in values]) +
+         ' |'
+    )
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count if self.count != 0 else 0
diff --git a/lib/core/evaluate.py b/lib/core/evaluate.py
new file mode 100644
index 0000000..0822d89
--- /dev/null
+++ b/lib/core/evaluate.py
@@ -0,0 +1,77 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from core.inference import get_max_preds
+
+
+def calc_dists(preds, target, normalize):
+    preds = preds.astype(np.float32)
+    target = target.astype(np.float32)
+    dists = np.zeros((preds.shape[1], preds.shape[0]))
+    for n in range(preds.shape[0]):
+        for c in range(preds.shape[1]):
+            if target[n, c, 0] > 1 and target[n, c, 1] > 1:
+                normed_preds = preds[n, c, :] / normalize[n]
+                normed_targets = target[n, c, :] / normalize[n]
+                dists[c, n] = np.linalg.norm(normed_preds - normed_targets)
+            else:
+                dists[c, n] = -1
+    return dists
+
+
+def dist_acc(dists, thr=0.5):
+    ''' Return percentage below threshold while ignoring values with a -1 '''
+    dist_cal = np.not_equal(dists, -1)
+    num_dist_cal = dist_cal.sum()
+    if num_dist_cal > 0:
+        return np.less(dists[dist_cal], thr).sum() * 1.0 / num_dist_cal
+    else:
+        return -1
+
+
+def accuracy(output, target, hm_type='gaussian', thr=0.5):
+    '''
+    Calculate accuracy according to PCK,
+    but uses ground truth heatmap rather than x,y locations
+    First value to be returned is average accuracy across 'idxs',
+    followed by individual accuracies
+    '''
+    idx = list(range(output.shape[1]))
+    norm = 1.0
+    if hm_type == 'gaussian':
+        pred, _ = get_max_preds(output)
+        # KSS is this line was added to the version for corner detection. I'm not
+        #     sure that it's needed and don't have time to investigate now so
+        #     I'm commenting it out for now.
+        # target = np.expand_dims(target, 1)
+        target, _ = get_max_preds(target)
+        h = output.shape[2]
+        w = output.shape[3]
+        norm = np.ones((pred.shape[0], 2)) * np.array([h, w]) / 10
+    dists = calc_dists(pred, target, norm)
+
+    acc = np.zeros((len(idx) + 1))
+    avg_acc = 0
+    cnt = 0
+
+    for i in range(len(idx)):
+        acc[i + 1] = dist_acc(dists[idx[i]])
+        if acc[i + 1] >= 0:
+            avg_acc = avg_acc + acc[i + 1]
+            cnt += 1
+
+    avg_acc = avg_acc / cnt if cnt != 0 else 0
+    if cnt != 0:
+        acc[0] = avg_acc
+    return acc, avg_acc, cnt, pred
+
+
diff --git a/lib/core/fecalbolifunc.py b/lib/core/fecalbolifunc.py
new file mode 100644
index 0000000..0cb67d4
--- /dev/null
+++ b/lib/core/fecalbolifunc.py
@@ -0,0 +1,161 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+ 
+import time
+import logging
+import os
+
+import numpy as np
+import torch
+
+from core.function import AverageMeter
+
+
+logger = logging.getLogger(__name__)
+
+
+def train(
+        config,
+        train_loader,
+        model,
+        criterion,
+        optimizer,
+        dict_writer,
+        summary_writer,
+        epoch,
+        device=None):
+
+    if device is None:
+        device = next(model.parameters()).device
+
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    batch_count = len(train_loader)
+    for i, label_batch in enumerate(train_loader):
+
+        batch_size = label_batch['image'].size(0)
+
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        # compute output
+        img_batch = label_batch['image'].to(device=device, non_blocking=True)
+        if img_batch.size(1) == 1:
+            img_batch = torch.cat([img_batch] * 3, dim=1)
+        output = model(img_batch)
+
+        heatmap_batch = label_batch['heatmap'].to(device=device, non_blocking=True)
+        loss = criterion(output, heatmap_batch)
+        # summary_writer.add_scalars(
+        #     'Loss/train',
+        #     criterion.loss_components.copy(),
+        #     epoch * batch_count + i)
+
+        # compute gradient and do update step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        losses.update(loss.item(), batch_size)
+
+        # measure elapsed time
+        elapsed_time = time.time() - end
+        batch_time.update(elapsed_time)
+        end = time.time()
+
+        if i % config.PRINT_FREQ == 0:
+            msg = 'Epoch: [{0}][{1}/{2}]\t' \
+                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+                  'Speed {speed:.1f} samples/s\t' \
+                  'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+                  'Loss {loss.val:.7f} ({loss.avg:.7f})'.format(
+                      epoch, i, len(train_loader), batch_time=batch_time,
+                      speed=batch_size/batch_time.val,
+                      data_time=data_time, loss=losses)
+            logger.info(msg)
+
+        dict_writer.writerow({
+            'Epoch': epoch,
+            'Batch': i,
+            'Loss': loss.item(),
+            'Batch Time': elapsed_time,
+            'Batch Size': batch_size,
+        })
+        summary_writer.add_scalar(
+            'Loss/train',
+            loss.item(),
+            epoch * batch_count + i)
+
+
+def validate(
+        config,
+        val_loader,
+        model,
+        criterion,
+        dict_writer,
+        summary_writer,
+        epoch,
+        device=None):
+
+    if device is None:
+        device = next(model.parameters()).device
+
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+
+    # switch to evaluate mode
+    model.eval()
+
+    with torch.no_grad():
+        end = time.time()
+        for i, label_batch in enumerate(val_loader):
+
+            batch_size = label_batch['image'].size(0)
+            img_batch = label_batch['image'].to(device=device, non_blocking=True)
+            if img_batch.size(1) == 1:
+                img_batch = torch.cat([img_batch] * 3, dim=1)
+            output = model(img_batch)
+
+            heatmap_batch = label_batch['heatmap'].to(device=device, non_blocking=True)
+            loss = criterion(output, heatmap_batch)
+
+            # measure accuracy and record loss
+            losses.update(loss.item(), batch_size)
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            if i % config.PRINT_FREQ == 0:
+                msg = 'Test: [{0}/{1}]\t' \
+                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
+                      'Loss {loss.val:.7f} ({loss.avg:.7f})'.format(
+                          i, len(val_loader), batch_time=batch_time, loss=losses)
+                logger.info(msg)
+
+        perf_indicator = -losses.avg
+
+        dict_writer.writerow({
+            'Epoch': epoch,
+            'Loss': losses.avg,
+            'Performance Indicator': perf_indicator,
+        })
+        summary_writer.add_scalar(
+            'Loss/validation',
+            losses.avg,
+            epoch)
+
+    return perf_indicator
diff --git a/lib/core/focalloss.py b/lib/core/focalloss.py
new file mode 100644
index 0000000..24ed82f
--- /dev/null
+++ b/lib/core/focalloss.py
@@ -0,0 +1,65 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as nnfunc
+
+# An implementation of focal loss (Focal Loss for Dense Object Detection; Lin et al)
+# Note that in their paper the authors state that initialization of the final layer
+# is critical. They also find that using a feature pyramid network (FPN) was important
+# to the success of thier approach
+
+def ce_focal_loss(input, target, weight=None, gamma=2.0, reduction='elementwise_mean'):
+
+    """ a simple multinomial implementation of focal loss """
+
+    cross_entropy_loss = nnfunc.cross_entropy(input, target, weight=weight, reduction='none')
+
+    probs = nnfunc.softmax(input, 1)
+    true_probs = torch.squeeze(
+        torch.gather(probs, 1, torch.unsqueeze(target, 1)),
+        1)
+
+    loss = (1 - true_probs) ** gamma * cross_entropy_loss
+
+    if reduction == 'none':
+        return loss
+    elif reduction == 'sum':
+        return loss.sum()
+    elif reduction == 'elementwise_mean':
+        return loss.mean()
+    else:
+        raise ValueError(
+                'bad reduction value. Valid values are:'
+                ' "none", "sum" or "elementwise_mean"')
+
+
+class CEFocalLoss(nn.Module):
+
+    def __init__(self, weight=None, gamma=2.0, reduction='elementwise_mean'):
+        self.register_buffer('weight', weight)
+        self.gamma = gamma
+        self.reduction = reduction
+
+    def forward(self, input, target):
+        return ce_focal_loss(
+            input, target,
+            weight=self.weight, gamma=self.gamma, reduction=self.reduction)
+
+
+def bce_focal_loss(input, target, pos_weight=None, gamma=2.0, reduction='elementwise_mean'):
+
+    bce_loss = nnfunc.binary_cross_entropy_with_logits(input, target, pos_weight=pos_weight, reduction='none')
+    probs = torch.sigmoid(input)
+
+    abs_prob_diff = torch.abs(target - probs)
+    loss = abs_prob_diff ** gamma * bce_loss
+
+    if reduction == 'none':
+        return loss
+    elif reduction == 'sum':
+        return loss.sum()
+    elif reduction == 'elementwise_mean':
+        return loss.mean()
+    else:
+        raise ValueError(
+                'bad reduction value. Valid values are:'
+                ' "none", "sum" or "elementwise_mean"')
diff --git a/lib/core/function.py b/lib/core/function.py
new file mode 100755
index 0000000..a81ae28
--- /dev/null
+++ b/lib/core/function.py
@@ -0,0 +1,303 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+ 
+import time
+import logging
+import os
+
+import numpy as np
+import torch
+
+from core.evaluate import accuracy
+from core.inference import get_final_preds
+from utils.transforms import flip_back
+from utils.vis import save_debug_images
+
+
+logger = logging.getLogger(__name__)
+
+
+def train(config, train_loader, model, criterion, optimizer, epoch,
+          output_dir, tb_log_dir, writer_dict=None, dict_writer=None):
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+    acc = AverageMeter()
+
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    for i, (input, target, target_weight, meta) in enumerate(train_loader):
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        # compute output
+        outputs = model(input)
+
+        target = target.cuda(non_blocking=True)
+        target_weight = target_weight.cuda(non_blocking=True)
+
+        if isinstance(outputs, list):
+            loss = criterion(outputs[0], target, target_weight)
+            for output in outputs[1:]:
+                loss += criterion(output, target, target_weight)
+        else:
+            output = outputs
+            loss = criterion(output, target, target_weight)
+
+        # loss = criterion(output, target, target_weight)
+
+        # compute gradient and do update step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # measure accuracy and record loss
+        losses.update(loss.item(), input.size(0))
+
+        _, avg_acc, cnt, pred = accuracy(output.detach().cpu().numpy(),
+                                         target.detach().cpu().numpy())
+        acc.update(avg_acc, cnt)
+
+        # measure elapsed time
+        elapsed_time = time.time() - end
+        batch_time.update(elapsed_time)
+        end = time.time()
+
+        if i % config.PRINT_FREQ == 0:
+            msg = 'Epoch: [{0}][{1}/{2}]\t' \
+                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+                  'Speed {speed:.1f} samples/s\t' \
+                  'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+                  'Loss {loss.val:.7f} ({loss.avg:.7f})\t' \
+                  'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
+                      epoch, i, len(train_loader), batch_time=batch_time,
+                      speed=input.size(0)/batch_time.val,
+                      data_time=data_time, loss=losses, acc=acc)
+            logger.info(msg)
+
+            if writer_dict:
+                writer = writer_dict['writer']
+                global_steps = writer_dict['train_global_steps']
+                writer.add_scalar('train_loss', losses.val, global_steps)
+                writer.add_scalar('train_acc', acc.val, global_steps)
+                writer_dict['train_global_steps'] = global_steps + 1
+
+            prefix = '{}_{}'.format(os.path.join(output_dir, 'train'), i)
+            save_debug_images(config, input, meta, target, pred*4, output,
+                              prefix)
+
+        if dict_writer:
+            dict_writer.writerow({
+                'Epoch': epoch,
+                'Batch': i,
+                'Loss': loss.item(),
+                'Accuracy': avg_acc,
+                'Batch Time': elapsed_time,
+                'Batch Size': input.size(0),
+            })
+
+
+def validate(config, val_loader, val_dataset, model, criterion, output_dir,
+             tb_log_dir, writer_dict=None, dict_writer=None, epoch=None):
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+    acc = AverageMeter()
+
+    # switch to evaluate mode
+    model.eval()
+
+    num_samples = len(val_dataset)
+    all_preds = np.zeros(
+        (num_samples, config.MODEL.NUM_JOINTS, 3),
+        dtype=np.float32
+    )
+    all_boxes = np.zeros((num_samples, 6))
+    image_path = []
+    filenames = []
+    imgnums = []
+    idx = 0
+    with torch.no_grad():
+        end = time.time()
+        for i, (input, target, target_weight, meta) in enumerate(val_loader):
+            # compute output
+            outputs = model(input)
+            if isinstance(outputs, list):
+                output = outputs[-1]
+            else:
+                output = outputs
+
+            if config.TEST.FLIP_TEST:
+                # this part is ugly, because pytorch has not supported negative index
+                # input_flipped = model(input[:, :, :, ::-1])
+                input_flipped = np.flip(input.cpu().numpy(), 3).copy()
+                input_flipped = torch.from_numpy(input_flipped).cuda()
+                outputs_flipped = model(input_flipped)
+
+                if isinstance(outputs_flipped, list):
+                    output_flipped = outputs_flipped[-1]
+                else:
+                    output_flipped = outputs_flipped
+
+                output_flipped = flip_back(output_flipped.cpu().numpy(),
+                                           val_dataset.flip_pairs)
+                output_flipped = torch.from_numpy(output_flipped.copy()).cuda()
+
+
+                # feature is not aligned, shift flipped heatmap for higher accuracy
+                if config.TEST.SHIFT_HEATMAP:
+                    output_flipped[:, :, :, 1:] = \
+                        output_flipped.clone()[:, :, :, 0:-1]
+
+                output = (output + output_flipped) * 0.5
+
+            target = target.cuda(non_blocking=True)
+            target_weight = target_weight.cuda(non_blocking=True)
+
+            loss = criterion(output, target, target_weight)
+
+            num_images = input.size(0)
+            # measure accuracy and record loss
+            losses.update(loss.item(), num_images)
+            _, avg_acc, cnt, pred = accuracy(output.cpu().numpy(),
+                                             target.cpu().numpy())
+
+            acc.update(avg_acc, cnt)
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            c = meta['center'].numpy()
+            s = meta['scale'].numpy()
+            score = meta['score'].numpy()
+
+            preds, maxvals = get_final_preds(
+                config, output.clone().cpu().numpy(), c, s)
+
+            all_preds[idx:idx + num_images, :, 0:2] = preds[:, :, 0:2]
+            all_preds[idx:idx + num_images, :, 2:3] = maxvals
+            # double check this all_boxes parts
+            all_boxes[idx:idx + num_images, 0:2] = c[:, 0:2]
+            all_boxes[idx:idx + num_images, 2:4] = s[:, 0:2]
+            all_boxes[idx:idx + num_images, 4] = np.prod(s*200, 1)
+            all_boxes[idx:idx + num_images, 5] = score
+            image_path.extend(meta['image'])
+
+            idx += num_images
+
+            if i % config.PRINT_FREQ == 0:
+                msg = 'Test: [{0}/{1}]\t' \
+                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
+                      'Loss {loss.val:.7f} ({loss.avg:.7f})\t' \
+                      'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
+                          i, len(val_loader), batch_time=batch_time,
+                          loss=losses, acc=acc)
+                logger.info(msg)
+
+                prefix = '{}_{}'.format(
+                    os.path.join(output_dir, 'val'), i
+                )
+                save_debug_images(config, input, meta, target, pred*4, output,
+                                  prefix)
+
+        try:
+            name_values, perf_indicator = val_dataset.evaluate(
+                config, all_preds, output_dir, all_boxes, image_path,
+                filenames, imgnums
+            )
+        except NotImplementedError:
+            name_values = []
+            perf_indicator = -losses.avg
+
+        model_name = config.MODEL.NAME
+        if isinstance(name_values, list):
+            for name_value in name_values:
+                _print_name_value(name_value, model_name)
+        else:
+            _print_name_value(name_values, model_name)
+
+        if writer_dict:
+            writer = writer_dict['writer']
+            global_steps = writer_dict['valid_global_steps']
+            writer.add_scalar(
+                'valid_loss',
+                losses.avg,
+                global_steps
+            )
+            writer.add_scalar(
+                'valid_acc',
+                acc.avg,
+                global_steps
+            )
+            if isinstance(name_values, list):
+                for name_value in name_values:
+                    writer.add_scalars(
+                        'valid',
+                        dict(name_value),
+                        global_steps
+                    )
+            else:
+                writer.add_scalars(
+                    'valid',
+                    dict(name_values),
+                    global_steps
+                )
+            writer_dict['valid_global_steps'] = global_steps + 1
+
+        if dict_writer:
+            dict_writer.writerow({
+                'Epoch': epoch,
+                'Loss': losses.avg,
+                'Accuracy': acc.avg,
+                'Performance Indicator': perf_indicator,
+            })
+
+    return perf_indicator
+
+
+# markdown format output
+def _print_name_value(name_value, full_arch_name):
+    names = name_value.keys()
+    values = name_value.values()
+    num_values = len(name_value)
+    logger.info(
+        '| Arch ' +
+        ' '.join(['| {}'.format(name) for name in names]) +
+        ' |'
+    )
+    logger.info('|---' * (num_values+1) + '|')
+
+    if len(full_arch_name) > 15:
+        full_arch_name = full_arch_name[:8] + '...'
+    logger.info(
+        '| ' + full_arch_name + ' ' +
+        ' '.join(['| {:.3f}'.format(value) for value in values]) +
+         ' |'
+    )
+
+
+class AverageMeter(object):
+    """Computes and stores the average and current value"""
+    def __init__(self):
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count if self.count != 0 else 0
diff --git a/lib/core/inference.py b/lib/core/inference.py
new file mode 100644
index 0000000..ba2bac8
--- /dev/null
+++ b/lib/core/inference.py
@@ -0,0 +1,79 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import numpy as np
+
+from utils.transforms import transform_preds
+
+
+def get_max_preds(batch_heatmaps):
+    '''
+    get predictions from score maps
+    heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
+    '''
+    assert isinstance(batch_heatmaps, np.ndarray), \
+        'batch_heatmaps should be numpy.ndarray'
+    assert batch_heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+    batch_size = batch_heatmaps.shape[0]
+    num_joints = batch_heatmaps.shape[1]
+    width = batch_heatmaps.shape[3]
+    heatmaps_reshaped = batch_heatmaps.reshape((batch_size, num_joints, -1))
+    idx = np.argmax(heatmaps_reshaped, 2)
+    maxvals = np.amax(heatmaps_reshaped, 2)
+
+    maxvals = maxvals.reshape((batch_size, num_joints, 1))
+    idx = idx.reshape((batch_size, num_joints, 1))
+
+    preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+
+    preds[:, :, 0] = (preds[:, :, 0]) % width
+    preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
+
+    pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
+    pred_mask = pred_mask.astype(np.float32)
+
+    preds *= pred_mask
+    return preds, maxvals
+
+
+def get_final_preds(config, batch_heatmaps, center, scale):
+    coords, maxvals = get_max_preds(batch_heatmaps)
+
+    heatmap_height = batch_heatmaps.shape[2]
+    heatmap_width = batch_heatmaps.shape[3]
+
+    # post-processing
+    if config.TEST.POST_PROCESS:
+        for n in range(coords.shape[0]):
+            for p in range(coords.shape[1]):
+                hm = batch_heatmaps[n][p]
+                px = int(math.floor(coords[n][p][0] + 0.5))
+                py = int(math.floor(coords[n][p][1] + 0.5))
+                if 1 < px < heatmap_width-1 and 1 < py < heatmap_height-1:
+                    diff = np.array(
+                        [
+                            hm[py][px+1] - hm[py][px-1],
+                            hm[py+1][px]-hm[py-1][px]
+                        ]
+                    )
+                    coords[n][p] += np.sign(diff) * .25
+
+    preds = coords.copy()
+
+    # Transform back
+    for i in range(coords.shape[0]):
+        preds[i] = transform_preds(
+            coords[i], center[i], scale[i], [heatmap_width, heatmap_height]
+        )
+
+    return preds, maxvals
diff --git a/lib/core/loss.py b/lib/core/loss.py
new file mode 100644
index 0000000..ad3850e
--- /dev/null
+++ b/lib/core/loss.py
@@ -0,0 +1,88 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import torch
+import torch.nn as nn
+
+
+class JointsMSELoss(nn.Module):
+    def __init__(self, use_target_weight):
+        super(JointsMSELoss, self).__init__()
+        self.criterion = nn.MSELoss(reduction='mean')
+        self.use_target_weight = use_target_weight
+
+    def forward(self, output, target, target_weight):
+        batch_size = output.size(0)
+        num_joints = output.size(1)
+        heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
+        heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+        # KSS is this line was added to the version for corner detection. I'm not
+        #     sure that it's needed and don't have time to investigate now so
+        #     I'm commenting it out for now.
+        #target_weight = target_weight.reshape((batch_size, num_joints, -1))
+        loss = 0
+
+        for idx in range(num_joints):
+            heatmap_pred = heatmaps_pred[idx].squeeze()
+            heatmap_gt = heatmaps_gt[idx].squeeze()
+            if self.use_target_weight:
+                loss += 0.5 * self.criterion(
+                    heatmap_pred.mul(target_weight[:, idx]),
+                    heatmap_gt.mul(target_weight[:, idx])
+                )
+            else:
+                loss += 0.5 * self.criterion(heatmap_pred, heatmap_gt)
+
+        return loss / num_joints
+
+
+class JointsOHKMMSELoss(nn.Module):
+    def __init__(self, use_target_weight, topk=8):
+        super(JointsOHKMMSELoss, self).__init__()
+        self.criterion = nn.MSELoss(reduction='none')
+        self.use_target_weight = use_target_weight
+        self.topk = topk
+
+    def ohkm(self, loss):
+        ohkm_loss = 0.
+        for i in range(loss.size()[0]):
+            sub_loss = loss[i]
+            topk_val, topk_idx = torch.topk(
+                sub_loss, k=self.topk, dim=0, sorted=False
+            )
+            tmp_loss = torch.gather(sub_loss, 0, topk_idx)
+            ohkm_loss += torch.sum(tmp_loss) / self.topk
+        ohkm_loss /= loss.size()[0]
+        return ohkm_loss
+
+    def forward(self, output, target, target_weight):
+        batch_size = output.size(0)
+        num_joints = output.size(1)
+        heatmaps_pred = output.reshape((batch_size, num_joints, -1)).split(1, 1)
+        heatmaps_gt = target.reshape((batch_size, num_joints, -1)).split(1, 1)
+
+        loss = []
+        for idx in range(num_joints):
+            heatmap_pred = heatmaps_pred[idx].squeeze()
+            heatmap_gt = heatmaps_gt[idx].squeeze()
+            if self.use_target_weight:
+                loss.append(0.5 * self.criterion(
+                    heatmap_pred.mul(target_weight[:, idx]),
+                    heatmap_gt.mul(target_weight[:, idx])
+                ))
+            else:
+                loss.append(
+                    0.5 * self.criterion(heatmap_pred, heatmap_gt)
+                )
+
+        loss = [l.mean(dim=1).unsqueeze(dim=1) for l in loss]
+        loss = torch.cat(loss, dim=1)
+
+        return self.ohkm(loss)
diff --git a/lib/core/segfunction.py b/lib/core/segfunction.py
new file mode 100644
index 0000000..4013473
--- /dev/null
+++ b/lib/core/segfunction.py
@@ -0,0 +1,124 @@
+# similar to function.py but modified for segmentation
+
+import time
+import logging
+
+import torch
+import torch.nn.functional as torchf
+
+from core.function import AverageMeter
+
+logger = logging.getLogger(__name__)
+
+
+def train(config, train_loader, model, criterion, optimizer, epoch, dict_writer=None):
+
+    batch_time = AverageMeter()
+    data_time = AverageMeter()
+    losses = AverageMeter()
+
+    # switch to train mode
+    model.train()
+
+    end = time.time()
+    for i, (img_batch, target_batch) in enumerate(train_loader):
+
+        # turn grayscale into 3 channels for model
+        img_batch = img_batch.cuda()
+        img_batch = torch.cat([img_batch] * 3, dim=1)
+
+        # measure data loading time
+        data_time.update(time.time() - end)
+
+        # compute output
+        output_batch = model(img_batch)
+
+        target_batch = target_batch.cuda(non_blocking=True)
+        loss = criterion(output_batch, target_batch)
+
+        # compute gradient and do update step
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        # record loss
+        losses.update(loss.item(), img_batch.size(0))
+
+        # measure elapsed time
+        elapsed_time = time.time() - end
+        batch_time.update(elapsed_time)
+        end = time.time()
+
+        if i % config.PRINT_FREQ == 0:
+            msg = 'Epoch: [{0}][{1}/{2}]\t' \
+                  'Time {batch_time.val:.3f}s ({batch_time.avg:.3f}s)\t' \
+                  'Speed {speed:.1f} samples/s\t' \
+                  'Data {data_time.val:.3f}s ({data_time.avg:.3f}s)\t' \
+                  'Loss {loss.val:.7f} ({loss.avg:.7f})'.format(
+                      epoch, i, len(train_loader), batch_time=batch_time,
+                      speed=img_batch.size(0)/batch_time.val,
+                      data_time=data_time, loss=losses)
+            logger.info(msg)
+
+        if dict_writer:
+            dict_writer.writerow({
+                'Epoch': epoch,
+                'Batch': i,
+                'Loss': loss.item(),
+                'Batch Time': elapsed_time,
+                'Batch Size': img_batch.size(0),
+            })
+
+
+def validate(config, val_loader, model, criterion, dict_writer=None, epoch=None):
+    batch_time = AverageMeter()
+    losses = AverageMeter()
+    acc = AverageMeter()
+
+    # switch to evaluate mode
+    model.eval()
+
+    with torch.no_grad():
+        end = time.time()
+        for i, (img_batch, target_batch) in enumerate(val_loader):
+
+            # turn grayscale into 3 channels for model
+            img_batch = img_batch.cuda()
+            img_batch = torch.cat([img_batch] * 3, dim=1)
+
+            num_images = img_batch.size(0)
+
+            # compute output
+            output_batch = model(img_batch)
+
+            target_batch = target_batch.cuda(non_blocking=True)
+            loss = criterion(output_batch, target_batch)
+
+            # measure accuracy and record loss
+            losses.update(loss.item(), num_images)
+
+            output_mask = output_batch >= 0.0
+            avg_acc = 1.0 - torch.abs(target_batch - output_mask.float()).mean()
+            acc.update(avg_acc.item(), num_images)
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            if i % config.PRINT_FREQ == 0:
+                msg = 'Test: [{0}/{1}]\t' \
+                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
+                      'Loss {loss.val:.7f} ({loss.avg:.7f})\t' \
+                      'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
+                          i, len(val_loader), batch_time=batch_time,
+                          loss=losses, acc=acc)
+                logger.info(msg)
+
+        if dict_writer:
+            dict_writer.writerow({
+                'Epoch': epoch,
+                'Loss': losses.avg,
+                'Accuracy': acc.avg,
+            })
+
+    return acc.avg
diff --git a/lib/dataset/JointsDataset.py b/lib/dataset/JointsDataset.py
new file mode 100755
index 0000000..5a8cc3a
--- /dev/null
+++ b/lib/dataset/JointsDataset.py
@@ -0,0 +1,289 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import copy
+import logging
+import random
+
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset
+
+from utils.transforms import get_affine_transform
+from utils.transforms import affine_transform
+from utils.transforms import fliplr_joints
+
+
+logger = logging.getLogger(__name__)
+
+
+class JointsDataset(Dataset):
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        self.num_joints = 0
+        self.pixel_std = 200
+        self.flip_pairs = []
+        self.parent_ids = []
+
+        self.is_train = is_train
+        self.root = root
+        self.image_set = image_set
+
+        self.output_path = cfg.OUTPUT_DIR
+        self.data_format = cfg.DATASET.DATA_FORMAT
+
+        self.scale_factor = cfg.DATASET.SCALE_FACTOR
+        self.rotation_factor = cfg.DATASET.ROT_FACTOR
+        self.flip = cfg.DATASET.FLIP
+        self.num_joints_half_body = cfg.DATASET.NUM_JOINTS_HALF_BODY
+        self.prob_half_body = cfg.DATASET.PROB_HALF_BODY
+        self.color_rgb = cfg.DATASET.COLOR_RGB
+
+        self.target_type = cfg.MODEL.TARGET_TYPE
+        self.image_size = np.array(cfg.MODEL.IMAGE_SIZE)
+        self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE)
+        self.sigma = cfg.MODEL.SIGMA
+        self.use_different_joints_weight = cfg.LOSS.USE_DIFFERENT_JOINTS_WEIGHT
+        self.joints_weight = 1
+
+        self.transform = transform
+        self.db = []
+
+    def _get_db(self):
+        raise NotImplementedError
+
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        raise NotImplementedError
+
+    def half_body_transform(self, joints, joints_vis):
+        upper_joints = []
+        lower_joints = []
+        for joint_id in range(self.num_joints):
+            if joints_vis[joint_id][0] > 0:
+                if joint_id in self.upper_body_ids:
+                    upper_joints.append(joints[joint_id])
+                else:
+                    lower_joints.append(joints[joint_id])
+
+        if np.random.randn() < 0.5 and len(upper_joints) > 2:
+            selected_joints = upper_joints
+        else:
+            selected_joints = lower_joints \
+                if len(lower_joints) > 2 else upper_joints
+
+        if len(selected_joints) < 2:
+            return None, None
+
+        selected_joints = np.array(selected_joints, dtype=np.float32)
+        center = selected_joints.mean(axis=0)[:2]
+
+        left_top = np.amin(selected_joints, axis=0)
+        right_bottom = np.amax(selected_joints, axis=0)
+
+        w = right_bottom[0] - left_top[0]
+        h = right_bottom[1] - left_top[1]
+
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+
+        scale = np.array(
+            [
+                w * 1.0 / self.pixel_std,
+                h * 1.0 / self.pixel_std
+            ],
+            dtype=np.float32
+        )
+
+        scale = scale * 1.5
+
+        return center, scale
+
+    def __len__(self,):
+        return len(self.db)
+
+    def __getitem__(self, idx):
+        db_rec = copy.deepcopy(self.db[idx])
+
+        image_file = db_rec['image']
+        filename = db_rec['filename'] if 'filename' in db_rec else ''
+        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
+
+        if self.data_format == 'zip':
+            from utils import zipreader
+            data_numpy = zipreader.imread(
+                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
+            )
+        else:
+            data_numpy = cv2.imread(
+                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION
+            )
+
+        if self.color_rgb:
+            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
+
+        if data_numpy is None:
+            logger.error('=> fail to read {}'.format(image_file))
+            raise ValueError('Fail to read {}'.format(image_file))
+
+        joints = db_rec['joints_3d']
+        joints_vis = db_rec['joints_3d_vis']
+
+        c = db_rec['center']
+        s = db_rec['scale']
+        score = db_rec['score'] if 'score' in db_rec else 1
+        r = 0
+
+        if self.is_train:
+            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
+                and np.random.rand() < self.prob_half_body):
+                c_half_body, s_half_body = self.half_body_transform(
+                    joints, joints_vis
+                )
+
+                if c_half_body is not None and s_half_body is not None:
+                    c, s = c_half_body, s_half_body
+
+            sf = self.scale_factor
+            rf = self.rotation_factor
+            s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
+                if random.random() <= 0.6 else 0
+
+            if self.flip and random.random() <= 0.5:
+                data_numpy = data_numpy[:, ::-1, :]
+                joints, joints_vis = fliplr_joints(
+                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
+                c[0] = data_numpy.shape[1] - c[0] - 1
+
+        trans = get_affine_transform(c, s, r, self.image_size)
+        input = cv2.warpAffine(
+            data_numpy,
+            trans,
+            (int(self.image_size[0]), int(self.image_size[1])),
+            flags=cv2.INTER_LINEAR)
+
+        if self.transform:
+            input = self.transform(input)
+
+        for i in range(self.num_joints):
+            if joints_vis[i, 0] > 0.0:
+                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
+
+        target, target_weight = self.generate_target(joints, joints_vis)
+
+        target = torch.from_numpy(target)
+        target_weight = torch.from_numpy(target_weight)
+
+        meta = {
+            'image': image_file,
+            'filename': filename,
+            'imgnum': imgnum,
+            'joints': joints,
+            'joints_vis': joints_vis,
+            'center': c,
+            'scale': s,
+            'rotation': r,
+            'score': score
+        }
+
+        return input, target, target_weight, meta
+
+    def select_data(self, db):
+        db_selected = []
+        for rec in db:
+            num_vis = 0
+            joints_x = 0.0
+            joints_y = 0.0
+            for joint, joint_vis in zip(
+                    rec['joints_3d'], rec['joints_3d_vis']):
+                if joint_vis[0] <= 0:
+                    continue
+                num_vis += 1
+
+                joints_x += joint[0]
+                joints_y += joint[1]
+            if num_vis == 0:
+                continue
+
+            joints_x, joints_y = joints_x / num_vis, joints_y / num_vis
+
+            area = rec['scale'][0] * rec['scale'][1] * (self.pixel_std**2)
+            joints_center = np.array([joints_x, joints_y])
+            bbox_center = np.array(rec['center'])
+            diff_norm2 = np.linalg.norm((joints_center-bbox_center), 2)
+            ks = np.exp(-1.0*(diff_norm2**2) / ((0.2)**2*2.0*area))
+
+            metric = (0.2 / 16) * num_vis + 0.45 - 0.2 / 16
+            if ks > metric:
+                db_selected.append(rec)
+
+        logger.info('=> num db: {}'.format(len(db)))
+        logger.info('=> num selected db: {}'.format(len(db_selected)))
+        return db_selected
+
+    def generate_target(self, joints, joints_vis):
+        '''
+        :param joints:  [num_joints, 3]
+        :param joints_vis: [num_joints, 3]
+        :return: target, target_weight(1: visible, 0: invisible)
+        '''
+        target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
+        target_weight[:, 0] = joints_vis[:, 0]
+
+        assert self.target_type == 'gaussian', \
+            'Only support gaussian map now!'
+
+        if self.target_type == 'gaussian':
+            target = np.zeros((self.num_joints,
+                               self.heatmap_size[1],
+                               self.heatmap_size[0]),
+                              dtype=np.float32)
+
+            tmp_size = self.sigma * 3
+
+            for joint_id in range(self.num_joints):
+                feat_stride = self.image_size / self.heatmap_size
+                mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
+                mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
+                # Check that any part of the gaussian is in-bounds
+                ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+                br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+                if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
+                        or br[0] < 0 or br[1] < 0:
+                    # If not, just return the image as is
+                    target_weight[joint_id] = 0
+                    continue
+
+                # # Generate gaussian
+                size = 2 * tmp_size + 1
+                x = np.arange(0, size, 1, np.float32)
+                y = x[:, np.newaxis]
+                x0 = y0 = size // 2
+                # The gaussian is not normalized, we want the center value to equal 1
+                g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
+
+                # Usable gaussian range
+                g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
+                g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
+                # Image range
+                img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
+                img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
+
+                v = target_weight[joint_id]
+                if v > 0.5:
+                    target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+
+        if self.use_different_joints_weight:
+            target_weight = np.multiply(target_weight, self.joints_weight)
+
+        return target, target_weight
diff --git a/lib/dataset/OpenFieldObjDataset.py b/lib/dataset/OpenFieldObjDataset.py
new file mode 100644
index 0000000..c420ee0
--- /dev/null
+++ b/lib/dataset/OpenFieldObjDataset.py
@@ -0,0 +1,163 @@
+import cv2
+import numpy as np
+import os
+import random
+import skimage.draw
+import skimage.io
+import torch
+from torch.utils.data import Dataset
+from torchvision.transforms import ColorJitter
+from torchvision.transforms.functional import to_pil_image, to_tensor
+import xml.etree.ElementTree as ET
+
+from utils.xform import centered_transform_mat, random_occlusion
+
+
+def parse_obj_labels(cvat_xml_path):
+    root = ET.parse(cvat_xml_path)
+    for image_elem in root.findall('./image'):
+        img_name = image_elem.attrib['name']
+        object_polygon_elems = (
+            pl for pl in image_elem.findall('./polygon')
+            if pl.attrib['label'] == 'object'
+        )
+
+        object_polygon_points = []
+        for object_polygon_elem in object_polygon_elems:
+            xy_strs = [
+                xy_str.split(',')
+                for xy_str in object_polygon_elem.attrib['points'].split(';')
+            ]
+            assert len(xy_strs) >= 3
+
+            xy_points = np.array(
+                [(float(x_str), float(y_str)) for x_str, y_str in xy_strs],
+                dtype=np.float32,
+            )
+            xy_points = np.transpose(xy_points)
+
+            object_polygon_points.append(xy_points)
+
+        yield {
+            'image_name': img_name,
+            'object_polygons': object_polygon_points,
+        }
+
+
+def transform_points(xy_points, xform):
+    # need a row of 1's for affine transform matrix mult
+    xy_points_xform = np.concatenate([
+        xy_points,
+        np.ones([1, xy_points.shape[1]], dtype=xy_points.dtype)])
+    xy_points_xform = xform @ xy_points_xform
+
+    return xy_points_xform[:2, :]
+
+
+class OpenFieldObjDataset(Dataset):
+
+    def __init__(self, cfg, object_labels, is_train, transform=None):
+        self.cfg = cfg
+        self.object_labels = object_labels
+        self.object_indexes = list(self._gen_obj_indexes())
+        self.is_train = is_train
+        self.transform = transform
+
+    def _gen_obj_indexes(self):
+        for img_index, curr_obj in enumerate(self.object_labels):
+            for obj_index in range(len(curr_obj['object_polygons'])):
+                yield {
+                    'image_index': img_index,
+                    'object_index': obj_index,
+                }
+
+    def __len__(self):
+        return len(self.object_indexes)
+
+    def __getitem__(self, idx):
+        curr_obj_indexes = self.object_indexes[idx]
+        image_index = curr_obj_indexes['image_index']
+        object_index = curr_obj_indexes['object_index']
+
+        image_size = np.array(self.cfg.MODEL.IMAGE_SIZE, dtype=np.uint32)
+
+        labels = self.object_labels[image_index]
+        image_name = labels['image_name']
+        object_polygons = labels['object_polygons']
+        selected_object_polygon = object_polygons[object_index]
+
+        image_path = os.path.join(self.cfg.DATASET.ROOT, image_name)
+        data_numpy = skimage.io.imread(image_path, as_gray=True) * 255
+        data_numpy = data_numpy.round().astype(np.uint8)
+        data_numpy = data_numpy[..., np.newaxis]
+
+        center_xy = (selected_object_polygon.min(1) + selected_object_polygon.max(1)) / 2.0
+        scale = 1.0
+        rot_deg = 0
+
+        if self.is_train:
+            sf = self.cfg.DATASET.SCALE_FACTOR
+            scale *= np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+            rot_deg = 360 * random.random() if random.random() <= 0.8 else 0
+
+            prob_randomized_center = self.cfg.DATASET.PROB_RANDOMIZED_CENTER
+            jitter_center = self.cfg.DATASET.JITTER_CENTER
+            if prob_randomized_center > 0 and random.random() <= prob_randomized_center:
+                center_xy[0] = data_numpy.shape[0] * random.random()
+                center_xy[1] = data_numpy.shape[1] * random.random()
+            elif jitter_center > 0:
+                center_xy[0] += image_size[1] * jitter_center * np.random.randn()
+                center_xy[1] += image_size[0] * jitter_center * np.random.randn()
+
+            if self.cfg.DATASET.FLIP and random.random() <= 0.5:
+                data_numpy = data_numpy[:, ::-1, :]
+                center_xy[0] = data_numpy.shape[0] - center_xy[0] - 1
+
+                for obj_poly in object_polygons:
+                    obj_poly[0, :] = data_numpy.shape[0] - obj_poly[0, :] - 1
+
+        trans = centered_transform_mat(center_xy, rot_deg, scale, image_size)
+        img = cv2.warpAffine(
+            data_numpy,
+            trans[:2, :],
+            (image_size[0], image_size[1]),
+            flags=cv2.INTER_LINEAR)
+
+        if self.is_train:
+            jitter_brightness = self.cfg.DATASET.JITTER_BRIGHTNESS
+            jitter_contrast = self.cfg.DATASET.JITTER_CONTRAST
+            jitter_saturation = self.cfg.DATASET.JITTER_SATURATION
+            if jitter_brightness > 0 or jitter_contrast > 0 or jitter_saturation > 0:
+                img = to_pil_image(img)
+                img = ColorJitter(jitter_brightness, jitter_contrast, jitter_saturation)(img)
+                img = to_tensor(img).squeeze(0).numpy()
+                img = (img * 255).astype(np.uint8)
+
+            prob_randomized_occlusion = self.cfg.DATASET.PROB_RANDOMIZED_OCCLUSION
+            max_occlusion_size = self.cfg.DATASET.MAX_OCCLUSION_SIZE
+            occlusion_opacities = self.cfg.DATASET.OCCLUSION_OPACITIES
+            if prob_randomized_occlusion > 0 and random.random() <= prob_randomized_occlusion:
+                random_occlusion(img, max_occlusion_size, np.random.choice(occlusion_opacities))
+
+        if self.transform:
+            img = self.transform(img)
+
+        # image size is width, height which means we reverse the order for creating a numpy array
+        seg_target = torch.zeros(image_size[1], image_size[0], dtype=torch.float32)
+        for obj_poly in object_polygons:
+            xformed_obj_poly = transform_points(obj_poly, trans)
+
+            # scikit image expects row followed by column so we give y, x order
+            rr, cc = skimage.draw.polygon(xformed_obj_poly[1, :], xformed_obj_poly[0, :])
+
+            # mask out any out-of-bounds indexes
+            rc_mask = (rr < image_size[1]) & (cc < image_size[0])
+            rr = rr[rc_mask]
+            cc = cc[rc_mask]
+
+            seg_target[rr, cc] = 1.0
+
+        # add channel dimension
+        seg_target = seg_target.unsqueeze(0)
+
+        return img, seg_target
diff --git a/lib/dataset/__init__.py b/lib/dataset/__init__.py
new file mode 100644
index 0000000..d1904b3
--- /dev/null
+++ b/lib/dataset/__init__.py
@@ -0,0 +1,13 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+# from .mpii import MPIIDataset as mpii
+# from .coco import COCODataset as coco
+from .hdf5mousepose import HDF5MousePose as hdf5mousepose
diff --git a/lib/dataset/coco.py b/lib/dataset/coco.py
new file mode 100755
index 0000000..b8bad40
--- /dev/null
+++ b/lib/dataset/coco.py
@@ -0,0 +1,445 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from collections import defaultdict
+from collections import OrderedDict
+import logging
+import os
+
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import json_tricks as json
+import numpy as np
+
+from dataset.JointsDataset import JointsDataset
+from nms.nms import oks_nms
+from nms.nms import soft_oks_nms
+
+
+logger = logging.getLogger(__name__)
+
+
+class COCODataset(JointsDataset):
+    '''
+    "keypoints": {
+        0: "nose",
+        1: "left_eye",
+        2: "right_eye",
+        3: "left_ear",
+        4: "right_ear",
+        5: "left_shoulder",
+        6: "right_shoulder",
+        7: "left_elbow",
+        8: "right_elbow",
+        9: "left_wrist",
+        10: "right_wrist",
+        11: "left_hip",
+        12: "right_hip",
+        13: "left_knee",
+        14: "right_knee",
+        15: "left_ankle",
+        16: "right_ankle"
+    },
+	"skeleton": [
+        [16,14],[14,12],[17,15],[15,13],[12,13],[6,12],[7,13], [6,7],[6,8],
+        [7,9],[8,10],[9,11],[2,3],[1,2],[1,3],[2,4],[3,5],[4,6],[5,7]]
+    '''
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        super().__init__(cfg, root, image_set, is_train, transform)
+        self.nms_thre = cfg.TEST.NMS_THRE
+        self.image_thre = cfg.TEST.IMAGE_THRE
+        self.soft_nms = cfg.TEST.SOFT_NMS
+        self.oks_thre = cfg.TEST.OKS_THRE
+        self.in_vis_thre = cfg.TEST.IN_VIS_THRE
+        self.bbox_file = cfg.TEST.COCO_BBOX_FILE
+        self.use_gt_bbox = cfg.TEST.USE_GT_BBOX
+        self.image_width = cfg.MODEL.IMAGE_SIZE[0]
+        self.image_height = cfg.MODEL.IMAGE_SIZE[1]
+        self.aspect_ratio = self.image_width * 1.0 / self.image_height
+        self.pixel_std = 200
+
+        self.coco = COCO(self._get_ann_file_keypoint())
+
+        # deal with class names
+        cats = [cat['name']
+                for cat in self.coco.loadCats(self.coco.getCatIds())]
+        self.classes = ['__background__'] + cats
+        logger.info('=> classes: {}'.format(self.classes))
+        self.num_classes = len(self.classes)
+        self._class_to_ind = dict(zip(self.classes, range(self.num_classes)))
+        self._class_to_coco_ind = dict(zip(cats, self.coco.getCatIds()))
+        self._coco_ind_to_class_ind = dict(
+            [
+                (self._class_to_coco_ind[cls], self._class_to_ind[cls])
+                for cls in self.classes[1:]
+            ]
+        )
+
+        # load image file names
+        self.image_set_index = self._load_image_set_index()
+        self.num_images = len(self.image_set_index)
+        logger.info('=> num_images: {}'.format(self.num_images))
+
+        self.num_joints = 17
+        self.flip_pairs = [[1, 2], [3, 4], [5, 6], [7, 8],
+                           [9, 10], [11, 12], [13, 14], [15, 16]]
+        self.parent_ids = None
+        self.upper_body_ids = (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
+        self.lower_body_ids = (11, 12, 13, 14, 15, 16)
+
+        self.joints_weight = np.array(
+            [
+                1., 1., 1., 1., 1., 1., 1., 1.2, 1.2,
+                1.5, 1.5, 1., 1., 1.2, 1.2, 1.5, 1.5
+            ],
+            dtype=np.float32
+        ).reshape((self.num_joints, 1))
+
+        self.db = self._get_db()
+
+        if is_train and cfg.DATASET.SELECT_DATA:
+            self.db = self.select_data(self.db)
+
+        logger.info('=> load {} samples'.format(len(self.db)))
+
+    def _get_ann_file_keypoint(self):
+        """ self.root / annotations / person_keypoints_train2017.json """
+        prefix = 'person_keypoints' \
+            if 'test' not in self.image_set else 'image_info'
+        return os.path.join(
+            self.root,
+            'annotations',
+            prefix + '_' + self.image_set + '.json'
+        )
+
+    def _load_image_set_index(self):
+        """ image id: int """
+        image_ids = self.coco.getImgIds()
+        return image_ids
+
+    def _get_db(self):
+        if self.is_train or self.use_gt_bbox:
+            # use ground truth bbox
+            gt_db = self._load_coco_keypoint_annotations()
+        else:
+            # use bbox from detection
+            gt_db = self._load_coco_person_detection_results()
+        return gt_db
+
+    def _load_coco_keypoint_annotations(self):
+        """ ground truth bbox and keypoints """
+        gt_db = []
+        for index in self.image_set_index:
+            gt_db.extend(self._load_coco_keypoint_annotation_kernal(index))
+        return gt_db
+
+    def _load_coco_keypoint_annotation_kernal(self, index):
+        """
+        coco ann: [u'segmentation', u'area', u'iscrowd', u'image_id', u'bbox', u'category_id', u'id']
+        iscrowd:
+            crowd instances are handled by marking their overlaps with all categories to -1
+            and later excluded in training
+        bbox:
+            [x1, y1, w, h]
+        :param index: coco image id
+        :return: db entry
+        """
+        im_ann = self.coco.loadImgs(index)[0]
+        width = im_ann['width']
+        height = im_ann['height']
+
+        annIds = self.coco.getAnnIds(imgIds=index, iscrowd=False)
+        objs = self.coco.loadAnns(annIds)
+
+        # sanitize bboxes
+        valid_objs = []
+        for obj in objs:
+            x, y, w, h = obj['bbox']
+            x1 = np.max((0, x))
+            y1 = np.max((0, y))
+            x2 = np.min((width - 1, x1 + np.max((0, w - 1))))
+            y2 = np.min((height - 1, y1 + np.max((0, h - 1))))
+            if obj['area'] > 0 and x2 >= x1 and y2 >= y1:
+                obj['clean_bbox'] = [x1, y1, x2-x1, y2-y1]
+                valid_objs.append(obj)
+        objs = valid_objs
+
+        rec = []
+        for obj in objs:
+            cls = self._coco_ind_to_class_ind[obj['category_id']]
+            if cls != 1:
+                continue
+
+            # ignore objs without keypoints annotation
+            if max(obj['keypoints']) == 0:
+                continue
+
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
+            joints_3d_vis = np.zeros((self.num_joints, 3), dtype=np.float)
+            for ipt in range(self.num_joints):
+                joints_3d[ipt, 0] = obj['keypoints'][ipt * 3 + 0]
+                joints_3d[ipt, 1] = obj['keypoints'][ipt * 3 + 1]
+                joints_3d[ipt, 2] = 0
+                t_vis = obj['keypoints'][ipt * 3 + 2]
+                if t_vis > 1:
+                    t_vis = 1
+                joints_3d_vis[ipt, 0] = t_vis
+                joints_3d_vis[ipt, 1] = t_vis
+                joints_3d_vis[ipt, 2] = 0
+
+            center, scale = self._box2cs(obj['clean_bbox'][:4])
+            rec.append({
+                'image': self.image_path_from_index(index),
+                'center': center,
+                'scale': scale,
+                'joints_3d': joints_3d,
+                'joints_3d_vis': joints_3d_vis,
+                'filename': '',
+                'imgnum': 0,
+            })
+
+        return rec
+
+    def _box2cs(self, box):
+        x, y, w, h = box[:4]
+        return self._xywh2cs(x, y, w, h)
+
+    def _xywh2cs(self, x, y, w, h):
+        center = np.zeros((2), dtype=np.float32)
+        center[0] = x + w * 0.5
+        center[1] = y + h * 0.5
+
+        if w > self.aspect_ratio * h:
+            h = w * 1.0 / self.aspect_ratio
+        elif w < self.aspect_ratio * h:
+            w = h * self.aspect_ratio
+        scale = np.array(
+            [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
+            dtype=np.float32)
+        if center[0] != -1:
+            scale = scale * 1.25
+
+        return center, scale
+
+    def image_path_from_index(self, index):
+        """ example: images / train2017 / 000000119993.jpg """
+        file_name = '%012d.jpg' % index
+        if '2014' in self.image_set:
+            file_name = 'COCO_%s_' % self.image_set + file_name
+
+        prefix = 'test2017' if 'test' in self.image_set else self.image_set
+
+        data_name = prefix + '.zip@' if self.data_format == 'zip' else prefix
+
+        image_path = os.path.join(
+            self.root, 'images', data_name, file_name)
+
+        return image_path
+
+    def _load_coco_person_detection_results(self):
+        all_boxes = None
+        with open(self.bbox_file, 'r') as f:
+            all_boxes = json.load(f)
+
+        if not all_boxes:
+            logger.error('=> Load %s fail!' % self.bbox_file)
+            return None
+
+        logger.info('=> Total boxes: {}'.format(len(all_boxes)))
+
+        kpt_db = []
+        num_boxes = 0
+        for n_img in range(0, len(all_boxes)):
+            det_res = all_boxes[n_img]
+            if det_res['category_id'] != 1:
+                continue
+            img_name = self.image_path_from_index(det_res['image_id'])
+            box = det_res['bbox']
+            score = det_res['score']
+
+            if score < self.image_thre:
+                continue
+
+            num_boxes = num_boxes + 1
+
+            center, scale = self._box2cs(box)
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
+            joints_3d_vis = np.ones(
+                (self.num_joints, 3), dtype=np.float)
+            kpt_db.append({
+                'image': img_name,
+                'center': center,
+                'scale': scale,
+                'score': score,
+                'joints_3d': joints_3d,
+                'joints_3d_vis': joints_3d_vis,
+            })
+
+        logger.info('=> Total boxes after fliter low score@{}: {}'.format(
+            self.image_thre, num_boxes))
+        return kpt_db
+
+    def evaluate(self, cfg, preds, output_dir, all_boxes, img_path,
+                 *args, **kwargs):
+        rank = cfg.RANK
+
+        res_folder = os.path.join(output_dir, 'results')
+        if not os.path.exists(res_folder):
+            try:
+                os.makedirs(res_folder)
+            except Exception:
+                logger.error('Fail to make {}'.format(res_folder))
+
+        res_file = os.path.join(
+            res_folder, 'keypoints_{}_results_{}.json'.format(
+                self.image_set, rank)
+        )
+
+        # person x (keypoints)
+        _kpts = []
+        for idx, kpt in enumerate(preds):
+            _kpts.append({
+                'keypoints': kpt,
+                'center': all_boxes[idx][0:2],
+                'scale': all_boxes[idx][2:4],
+                'area': all_boxes[idx][4],
+                'score': all_boxes[idx][5],
+                'image': int(img_path[idx][-16:-4])
+            })
+        # image x person x (keypoints)
+        kpts = defaultdict(list)
+        for kpt in _kpts:
+            kpts[kpt['image']].append(kpt)
+
+        # rescoring and oks nms
+        num_joints = self.num_joints
+        in_vis_thre = self.in_vis_thre
+        oks_thre = self.oks_thre
+        oks_nmsed_kpts = []
+        for img in kpts.keys():
+            img_kpts = kpts[img]
+            for n_p in img_kpts:
+                box_score = n_p['score']
+                kpt_score = 0
+                valid_num = 0
+                for n_jt in range(0, num_joints):
+                    t_s = n_p['keypoints'][n_jt][2]
+                    if t_s > in_vis_thre:
+                        kpt_score = kpt_score + t_s
+                        valid_num = valid_num + 1
+                if valid_num != 0:
+                    kpt_score = kpt_score / valid_num
+                # rescoring
+                n_p['score'] = kpt_score * box_score
+
+            if self.soft_nms:
+                keep = soft_oks_nms(
+                    [img_kpts[i] for i in range(len(img_kpts))],
+                    oks_thre
+                )
+            else:
+                keep = oks_nms(
+                    [img_kpts[i] for i in range(len(img_kpts))],
+                    oks_thre
+                )
+
+            if len(keep) == 0:
+                oks_nmsed_kpts.append(img_kpts)
+            else:
+                oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep])
+
+        self._write_coco_keypoint_results(
+            oks_nmsed_kpts, res_file)
+        if 'test' not in self.image_set:
+            info_str = self._do_python_keypoint_eval(
+                res_file, res_folder)
+            name_value = OrderedDict(info_str)
+            return name_value, name_value['AP']
+        else:
+            return {'Null': 0}, 0
+
+    def _write_coco_keypoint_results(self, keypoints, res_file):
+        data_pack = [
+            {
+                'cat_id': self._class_to_coco_ind[cls],
+                'cls_ind': cls_ind,
+                'cls': cls,
+                'ann_type': 'keypoints',
+                'keypoints': keypoints
+            }
+            for cls_ind, cls in enumerate(self.classes) if not cls == '__background__'
+        ]
+
+        results = self._coco_keypoint_results_one_category_kernel(data_pack[0])
+        logger.info('=> writing results json to %s' % res_file)
+        with open(res_file, 'w') as f:
+            json.dump(results, f, sort_keys=True, indent=4)
+        try:
+            json.load(open(res_file))
+        except Exception:
+            content = []
+            with open(res_file, 'r') as f:
+                for line in f:
+                    content.append(line)
+            content[-1] = ']'
+            with open(res_file, 'w') as f:
+                for c in content:
+                    f.write(c)
+
+    def _coco_keypoint_results_one_category_kernel(self, data_pack):
+        cat_id = data_pack['cat_id']
+        keypoints = data_pack['keypoints']
+        cat_results = []
+
+        for img_kpts in keypoints:
+            if len(img_kpts) == 0:
+                continue
+
+            _key_points = np.array([img_kpts[k]['keypoints']
+                                    for k in range(len(img_kpts))])
+            key_points = np.zeros(
+                (_key_points.shape[0], self.num_joints * 3), dtype=np.float
+            )
+
+            for ipt in range(self.num_joints):
+                key_points[:, ipt * 3 + 0] = _key_points[:, ipt, 0]
+                key_points[:, ipt * 3 + 1] = _key_points[:, ipt, 1]
+                key_points[:, ipt * 3 + 2] = _key_points[:, ipt, 2]  # keypoints score.
+
+            result = [
+                {
+                    'image_id': img_kpts[k]['image'],
+                    'category_id': cat_id,
+                    'keypoints': list(key_points[k]),
+                    'score': img_kpts[k]['score'],
+                    'center': list(img_kpts[k]['center']),
+                    'scale': list(img_kpts[k]['scale'])
+                }
+                for k in range(len(img_kpts))
+            ]
+            cat_results.extend(result)
+
+        return cat_results
+
+    def _do_python_keypoint_eval(self, res_file, res_folder):
+        coco_dt = self.coco.loadRes(res_file)
+        coco_eval = COCOeval(self.coco, coco_dt, 'keypoints')
+        coco_eval.params.useSegm = None
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        stats_names = ['AP', 'Ap .5', 'AP .75', 'AP (M)', 'AP (L)', 'AR', 'AR .5', 'AR .75', 'AR (M)', 'AR (L)']
+
+        info_str = []
+        for ind, name in enumerate(stats_names):
+            info_str.append((name, coco_eval.stats[ind]))
+
+        return info_str
diff --git a/lib/dataset/cornerdataset.py b/lib/dataset/cornerdataset.py
new file mode 100644
index 0000000..493bc29
--- /dev/null
+++ b/lib/dataset/cornerdataset.py
@@ -0,0 +1,271 @@
+# NOTE:
+#   This code is based on hdf5mousepose.py code. It was named
+#   hdf5mousepose.py by Massimo. I (KSS) renamed it so that the original
+#   hdf5mousepose code will work as is but that means that this code will
+#   not work until it is reintegrated back into the codebase
+
+from collections import OrderedDict
+import copy
+import cv2
+import logging
+import h5py
+import numpy as np
+import random
+import torch
+from torchvision.transforms import ColorJitter
+from torchvision.transforms.functional import to_pil_image, to_tensor
+
+from dataset.JointsDataset import JointsDataset
+from utils.transforms import affine_transform
+from utils.transforms import fliplr_joints
+from utils.xform import centered_transform_mat, random_occlusion
+
+import matplotlib.pyplot as plt
+
+logger = logging.getLogger(__name__)
+
+
+class CornerDataset(JointsDataset):
+
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        super().__init__(cfg, root, image_set, is_train, transform)
+
+        self.prob_randomized_occlusion = cfg.DATASET.PROB_RANDOMIZED_OCCLUSION
+        self.max_occlusion_size = cfg.DATASET.MAX_OCCLUSION_SIZE
+        self.occlusion_opacities = cfg.DATASET.OCCLUSION_OPACITIES
+        self.prob_randomized_center = cfg.DATASET.PROB_RANDOMIZED_CENTER
+        self.jitter_center = cfg.DATASET.JITTER_CENTER
+        self.jitter_brightness = cfg.DATASET.JITTER_BRIGHTNESS
+        self.jitter_contrast = cfg.DATASET.JITTER_CONTRAST
+        self.jitter_saturation = cfg.DATASET.JITTER_SATURATION
+
+        self.heatmap = np.array(cfg.MODEL.HEATMAP_SIZE)
+
+        self.num_joints = 1
+        # Changed this to 1
+
+        self.db = self._get_db()
+
+    def _get_db(self):
+
+        def gen_db():
+            with h5py.File(self.root, 'r') as hdf5file:
+                if self.image_set in hdf5file:
+                    for name, group in hdf5file[self.image_set].items():
+                        if 'frames' in group and 'points' in group:
+                            points = group['points']
+                            # center_x = ((points[0, 0, 0] + points[0, 0, 1]) / 2)
+                            # center_y = ((points[0, 1, 0] + points[0, 1, 1]) / 2)
+                            # center = np.array([center_x, center_y], dtype=np.float32)
+
+                            for i in range(4):
+
+                                yield {
+                                    'image_name': name,
+                                    'object_index': i,
+                                    'center': np.array(points[0, i, :], dtype=np.float32)
+                                    }
+
+                            # for grp_frame_index in range(points.shape[0]):
+                            #     grp_frame_pts = points[grp_frame_index, ...]
+                            #     max_x, max_y = np.amax(grp_frame_pts, axis=0)
+                            #     min_x, min_y = np.amin(grp_frame_pts, axis=0)
+
+                                # width = max_x - min_x
+                                # height = max_y - min_y
+
+                                # center_x = (max_x + min_x) / 2
+                                # center_y = (max_y + min_y) / 2
+                                # center_xy = np.array([center_x, center_y], dtype=np.float32)
+                                # scale = np.array(
+                                #     [
+                                #         width * 1.0 / self.pixel_std,
+                                #         height * 1.0 / self.pixel_std,
+                                #     ],
+                                #     dtype=np.float32)
+                                # scale_range = 0.4
+                                # scale = 1 + np.random.random([2]) * scale_range - scale_range / 2
+                                # scale = np.ones([2], dtype=np.float32)
+
+                                # joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
+                                # joints_3d[:, :2] = grp_frame_pts
+
+        return list(gen_db())
+    #
+    # def _gen_obj_indexes(self):
+    #     for img_index, curr_obj in enumerate(self.object_labels):
+    #         for obj_index in range(4):
+    #             yield {
+    #                 'image_index': img_index,
+    #                 'object_index': obj_index,
+    #             }
+
+    def __len__(self):
+        return len(self.db)
+
+    def generate_target(self, center, joints_vis):
+        '''
+        :param center:  [num_joints, 3]
+        :param joints_vis: [num_joints, 3]
+        :return: target, target_weight(1: visible, 0: invisible)
+        '''
+
+        target_weight = np.ones((self.num_joints, 1), dtype=np.float32)
+        target_weight[0] = joints_vis[0]
+
+        assert self.target_type == 'gaussian', \
+            'Only support gaussian map now!'
+
+        if self.target_type == 'gaussian':
+            target = np.zeros((self.heatmap_size[1],
+                               self.heatmap_size[0]),
+                              dtype=np.float32)
+
+            tmp_size = self.sigma * 3
+
+        feat_stride = self.image_size / self.heatmap_size
+        mu_x = int(center[0] / feat_stride[0] + 0.5)
+        mu_y = int(center[1] / feat_stride[1] + 0.5)
+        # Check that any part of the gaussian is in-bounds
+        ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+        br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+        if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
+                or br[0] < 0 or br[1] < 0:
+            # If not, just return the image as is
+            target_weight = 0
+
+        # # Generate gaussian
+        size = 2 * tmp_size + 1
+        x = np.arange(0, size, 1, np.float32)
+        y = x[:, np.newaxis]
+        x0 = y0 = size // 2
+        # The gaussian is not normalized, we want the center value to equal 1
+        g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
+
+        # Usable gaussian range
+        g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
+        g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
+        # Image range
+        img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
+        img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
+
+        v = target_weight
+        if v > 0.5:
+            target[img_y[0]:img_y[1], img_x[0]:img_x[1]] = \
+                g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+
+        if self.use_different_joints_weight:
+            target_weight = np.multiply(target_weight, self.joints_weight)
+
+        return target, target_weight
+
+    def __getitem__(self, idx):
+        db_rec = copy.deepcopy(self.db[idx])
+
+        filename = db_rec['filename'] if 'filename' in db_rec else ''
+        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
+        img_grp_name = db_rec['image_name']
+        data_numpy = None
+        with h5py.File(self.root, 'r') as hdf5file:
+            data_numpy = hdf5file[self.image_set][img_grp_name]['frames'][0, ...]
+
+        if data_numpy is None:
+            logger.error('=> fail to read {}'.format(db_rec['image_name']))
+            raise ValueError('Fail to read {}'.format(db_rec['image_name']))
+
+        # joints = db_rec['joints_3d']
+        # joints_vis = db_rec['joints_3d_vis']
+
+        corner_loc = db_rec['center'].copy()
+        cam_loc = db_rec['center'].copy()
+
+        # s = db_rec['scale']
+        # score = db_rec['score'] if 'score' in db_rec else 1
+        r = 0
+
+        # JITTER CENTER NEW
+        # a = np.random.randint(0, 100)
+        # if a > 50:
+        cam_loc[0] += np.random.uniform(-50, 50)
+
+        # b = np.random.randint(0, 100)
+        # if b > 50:
+        cam_loc[1] += np.random.uniform(-50, 50)
+
+        if self.is_train:
+            sf = self.scale_factor
+            # s *= np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+            r = 360 * random.random() if random.random() <= 0.8 else 0
+
+            # if self.prob_randomized_center > 0 and random.random() <= self.prob_randomized_center:
+            #     c[0] = data_numpy.shape[1] * random.random()
+            #     c[1] = data_numpy.shape[0] * random.random()
+
+        trans = centered_transform_mat(cam_loc, r, 1, self.image_size)
+        input = cv2.warpAffine(
+            data_numpy,
+            trans[:2, :],
+            (int(self.image_size[0]), int(self.image_size[1])),
+            flags=cv2.INTER_LINEAR)
+
+        if self.is_train:
+            if self.jitter_brightness > 0 or self.jitter_contrast > 0 or self.jitter_saturation > 0:
+                input = to_pil_image(input)
+                input = ColorJitter(self.jitter_brightness, self.jitter_contrast, self.jitter_saturation)(input)
+                input = to_tensor(input).squeeze(0).numpy()
+                input = (input * 255).astype(np.uint8)
+
+            if self.prob_randomized_occlusion > 0 and random.random() <= self.prob_randomized_occlusion:
+                random_occlusion(input, self.max_occlusion_size, np.random.choice(self.occlusion_opacities))
+
+        if self.transform:
+            input = self.transform(input)
+
+        joints_vis = np.ones((self.num_joints, 1), dtype=np.float32)
+
+        if np.all(joints_vis > 0.0):
+            corner_loc[0:2] = affine_transform(corner_loc[0:2], trans)
+
+        # target_weight = np.ones((1, self.heatmap[1], self.heatmap[0]), dtype=np.float32)
+
+        # joints_vis = np.ones((self.num_joints, 3), dtype=np.float32)
+        # joints = np.zeros((self.num_joints, 3), dtype=np.float32)
+
+        target, target_weight = self.generate_target(corner_loc, joints_vis)
+        # self.generate_target(joints, joints_vis)
+        # ???? how to gen weights?
+
+        # if not torch.is_tensor(target):
+        #    target = torch.from_numpy(target)
+        # if not torch.is_tensor(target_weight):
+        #    target_weight = torch.from_numpy(target_weight)
+
+        meta = {
+            'image': db_rec['image_name'],
+            'filename': filename,
+            'imgnum': imgnum,
+            # 'joints': joints,
+            # 'joints_vis': joints_vis,
+            'center': cam_loc,
+            # 'scale': s,
+            'rotation': r,
+            # 'score': score
+        }
+
+        input = torch.stack([input.squeeze(0)] * 3)
+
+        # plt.imshow(input[0])
+        # print('target.shape:', target.shape, target.dtype)
+        # plt.imshow(target)
+        # plt.show()
+        _, axs = plt.subplots(1, 4, figsize=(12, 6))
+        axs[0].imshow(input[0], aspect='equal')
+        axs[1].imshow(input[1], aspect='equal')
+        axs[2].imshow(input[2], aspect='equal')
+        axs[3].imshow(target, aspect='equal')
+        plt.show()
+
+        print('input min max mean:', input.min(), input.max(), input.mean())
+        print('target min max mean:', target.min(), target.max(), target.mean())
+
+        return input, target, target_weight, meta
diff --git a/lib/dataset/fecalbolidata.py b/lib/dataset/fecalbolidata.py
new file mode 100644
index 0000000..a99831e
--- /dev/null
+++ b/lib/dataset/fecalbolidata.py
@@ -0,0 +1,301 @@
+import cv2
+import numpy as np
+import os
+import re
+import skimage.io
+import torch
+from torch.utils.data import Dataset
+from torchvision.transforms import ColorJitter
+from torchvision.transforms.functional import to_pil_image, to_tensor
+import xml.etree.ElementTree as ET
+
+from utils.transforms import affine_transform
+from utils.xform import centered_transform_mat, random_occlusion
+
+
+def parse_fecal_boli_labels(cvat_xml_path):
+    root = ET.parse(cvat_xml_path)
+    for image_elem in root.findall('./image'):
+        img_name = image_elem.attrib['name']
+        points_elems = (
+            pl for pl in image_elem.findall('./points')
+            if pl.attrib['label'] == 'Bolus'
+        )
+
+        fecal_boli_xy_strs = []
+        for points_elem in points_elems:
+            fecal_boli_xy_strs += [
+                xy_str.split(',')
+                for xy_str in points_elem.attrib['points'].split(';')
+            ]
+
+        assert len(fecal_boli_xy_strs) 
+
+        fecal_boli_xy = np.array(fecal_boli_xy_strs, dtype=np.float)
+        yield {
+            'image_name': img_name,
+            'fecal_boli_xy': fecal_boli_xy,
+        }
+
+
+def transform_points(xy_points, xform):
+    # need a row of 1's for affine transform matrix mult
+    xy_points_xform = np.concatenate([
+        xy_points,
+        np.ones([1, xy_points.shape[1]], dtype=xy_points.dtype)])
+    xy_points_xform = xform @ xy_points_xform
+
+    return xy_points_xform[:2, :]
+
+
+def _read_image(image_path):
+    data_numpy = skimage.io.imread(image_path, as_gray=True) * 255
+
+    data_numpy = data_numpy.round().astype(np.uint8)
+    data_numpy = data_numpy[..., np.newaxis]
+
+    return data_numpy
+
+
+class FecalBoliDataset(Dataset):
+
+    def __init__(self, cfg, image_dir, fecal_boli_labels, is_train, transform=None):
+        self.cfg = cfg
+        self.image_dir = image_dir
+        self.fecal_boli_labels = fecal_boli_labels
+        self.is_train = is_train
+        self.transform = transform
+
+        self.scale_factor = cfg.DATASET.SCALE_FACTOR
+        self.flip = cfg.DATASET.FLIP
+
+        self.image_size = np.array(cfg.MODEL.IMAGE_SIZE)
+        self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE)
+        self.sigma = cfg.MODEL.SIGMA
+        self.target_type = cfg.MODEL.TARGET_TYPE
+        self.model_extra = cfg.MODEL.EXTRA
+
+    def __len__(self):
+        return len(self.fecal_boli_labels)
+
+    def _gen_heatmap(self, fecal_boli_xy):
+        target = np.zeros(
+            (1, self.heatmap_size[1], self.heatmap_size[0]),
+            dtype=np.float32)
+
+        # build target heatmap where each point is the center of a 2D gaussian
+        if self.target_type == 'gaussian':
+            tmp_size = self.sigma * 3
+
+            # # TODO can we add sub-pixel precision here?
+            # for joint_id in range(1):
+            #     for pose_inst in pose_instances:
+            for fbolus_xy in fecal_boli_xy:
+                    feat_stride = self.image_size / self.heatmap_size
+                    mu_x = int(fbolus_xy[0] / feat_stride[0] + 0.5)
+                    mu_y = int(fbolus_xy[1] / feat_stride[1] + 0.5)
+                    # Check that any part of the gaussian is in-bounds
+                    ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+                    br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+                    if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
+                            or br[0] < 0 or br[1] < 0:
+                        # If not, just return the image as is
+                        continue
+
+                    # # Generate gaussian
+                    size = 2 * tmp_size + 1
+                    x = np.arange(0, size, 1, np.float32)
+                    y = x[:, np.newaxis]
+                    x0 = y0 = size // 2
+                    # The gaussian is not normalized, we want the center value to equal 1
+                    g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
+
+                    # Usable gaussian range
+                    g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
+                    g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
+                    # Image range
+                    img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
+                    img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
+
+                    target[0, img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
+                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]],
+                        target[0, img_y[0]:img_y[1], img_x[0]:img_x[1]])
+
+        # build target heatmap where each point is the center of a 2D exponential
+        # decay function
+        elif self.target_type == 'exp_decay':
+
+            # for now we require image_size and heatmap_size to be the
+            # same, but we can change code to allow different sizes
+            # later if needed
+            assert np.all(self.image_size == self.heatmap_size)
+            
+            img_width, img_height = self.image_size
+
+            # Each heat patch will just be a small square to save on
+            # compute but large enough to allow almost all decay. For
+            # a lambda of 1 a distance of 3 should be sufficient: (e^-3 = ~0.05).
+            # We just need to scale this by 1/exp_decay_lambda to
+            # make it work for any lambda
+            EXP_DECAY_PATCH_SIZE_FACTOR = 4
+            exp_decay_lambda = self.model_extra['EXP_DECAY_LAMBDA']
+            heat_patch_size = EXP_DECAY_PATCH_SIZE_FACTOR / exp_decay_lambda
+
+            # # for each joint within a pose instance we calculate a pose
+            # # heatmap patch which will be a 2D exponential decay and then
+            # # apply that patch to the target heatmap
+            # for joint_id in range(1):
+            #     for pose_inst in pose_instances:
+            for fbolus_xy in fecal_boli_xy:
+
+                    mu_x = fbolus_xy[0]
+                    mu_y = fbolus_xy[1]
+
+                    start_x = int(max(np.floor(mu_x - heat_patch_size), 0))
+                    start_y = int(max(np.floor(mu_y - heat_patch_size), 0))
+
+                    stop_x = int(min(np.ceil(mu_x + heat_patch_size + 1), img_width))
+                    stop_y = int(min(np.ceil(mu_y + heat_patch_size + 1), img_height))
+
+                    if start_x < stop_x and start_y < stop_y:
+                        patch_width = stop_x - start_x
+                        patch_height = stop_y - start_y
+
+                        x = np.arange(start_x, stop_x) - mu_x
+                        y = np.arange(start_y, stop_y) - mu_y
+
+                        x_mat = np.tile(x, patch_height).reshape(patch_height, patch_width)
+                        y_mat = np.tile(y, patch_width).reshape(patch_width, patch_height).T
+
+                        xy_mat = np.stack([x_mat, y_mat], axis=2)
+                        dist_mat = np.linalg.norm(xy_mat, axis=2)
+                        decay_mat = np.exp(-exp_decay_lambda * dist_mat)
+
+                        # we apply our 2D exponential decay patch to the target heatmap
+                        # but we do it using maximum so that we get the desired result
+                        # for overlapping patches
+                        target[0, start_y:stop_y, start_x:stop_x] = np.maximum(
+                            decay_mat,
+                            target[0, start_y:stop_y, start_x:stop_x])
+
+        # build target heatmap where each point is a single pixel set to 1.0
+        elif self.target_type == 'point':
+
+            # for now we require image_size and heatmap_size to be the
+            # same, but we can change code to allow different sizes
+            # later if needed
+            assert np.all(self.image_size == self.heatmap_size)
+            
+            img_width, img_height = self.image_size
+
+            # # for each joint within a pose instance we set the joint x,y to 1.0
+            # for joint_id in range(self.point_class_count):
+            #     for pose_inst in pose_instances:
+            for fbolus_xy in fecal_boli_xy:
+
+                    mu_x = int(round(fbolus_xy[0]))
+                    mu_y = int(round(fbolus_xy[1]))
+
+                    # print(type(joint_id), type(mu_y), type(mu_x))
+                    # print(joint_id, mu_y, mu_x)
+                    if 0 <= mu_x < img_width and 0 <= mu_y < img_height:
+                        target[0, mu_y, mu_x] = 1.0
+
+        # if we reach this else we've been given a target type that we don't
+        # know how to deal with
+        else:
+            raise Exception('unexpected target type: {}'.format(self.target_type))
+
+        return torch.tensor(target, dtype=torch.float32)
+
+    def __getitem__(self, idx):
+
+        fecal_boli_label = self.fecal_boli_labels[idx]
+        image_name = fecal_boli_label['image_name']
+        fecal_boli_xy = fecal_boli_label['fecal_boli_xy'].copy()
+
+        image_size = np.array(self.cfg.MODEL.IMAGE_SIZE, dtype=np.uint32)
+
+        image_path = os.path.join(self.image_dir, image_name)
+        data_numpy = _read_image(image_path)
+
+        # pick a random point between the min and max points for
+        # the center_xy
+        min_xy = fecal_boli_xy.min(0)
+        max_xy = fecal_boli_xy.max(0)
+        diff_xy = max_xy - min_xy
+        center_xy = min_xy + diff_xy * np.random.rand(2)
+
+        scale = self.cfg.DATASET.SCALE
+        rot_deg = 0
+
+        if self.is_train:
+            sf = self.cfg.DATASET.SCALE_FACTOR
+            scale *= np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+            rot_deg = 360 * np.random.random() if np.random.random() <= 0.8 else 0
+
+            if self.cfg.DATASET.FLIP and np.random.random() <= 0.5:
+                # reflect the pixels along the X axis
+                data_numpy = data_numpy[:, ::-1, ...]
+
+                # center X needs to be adjusted along with all of the Xs in fecal_boli_xy
+                center_xy[0] = (data_numpy.shape[1] - 1) - center_xy[0]
+                fecal_boli_xy[:, 0] = (data_numpy.shape[1] - 1) - fecal_boli_xy[:, 0]
+
+        trans = centered_transform_mat(center_xy, rot_deg, scale, image_size)
+        img = cv2.warpAffine(
+            data_numpy,
+            trans[:2, :],
+            (image_size[0], image_size[1]),
+            flags=cv2.INTER_LINEAR)
+
+        # for training data we throw in some image augmentation:
+        # brightness, contrast
+        if self.is_train:
+            jitter_brightness = self.cfg.DATASET.JITTER_BRIGHTNESS
+            jitter_contrast = self.cfg.DATASET.JITTER_CONTRAST
+            if jitter_brightness > 0 or jitter_contrast > 0:
+                img = to_pil_image(img)
+                img = ColorJitter(jitter_brightness, jitter_contrast)(img)
+                img = to_tensor(img).numpy()
+                img = (img * 255).astype(np.uint8)
+
+            prob_randomized_occlusion = self.cfg.DATASET.PROB_RANDOMIZED_OCCLUSION
+            max_occlusion_size = self.cfg.DATASET.MAX_OCCLUSION_SIZE
+            occlusion_opacities = self.cfg.DATASET.OCCLUSION_OPACITIES
+            if prob_randomized_occlusion > 0 and np.random.random() <= prob_randomized_occlusion:
+                random_occlusion(img, max_occlusion_size, np.random.choice(occlusion_opacities))
+        else:
+            if img.ndim == 3:
+                img = np.stack([img[:, :, i] for i in range(img.shape[2])])
+
+        img = torch.from_numpy(img).to(torch.float32) / 255
+        if img.dim() == 2:
+            img = img.unsqueeze(0)
+
+        # if we were provided an image augmentation in the constructor we use it here
+        if self.transform:
+            img = self.transform(img)
+
+        # # Batchs need to allow a varying number of pose instances per sample
+        # # but pytorch doesn't support jagged/ragged tensors. Instead
+        # # we pack the poses into a (possibly) oversized tensor and include the
+        # # instance count in the return value. This does waste some space but
+        # # in the scheme of things it's negligible.
+        # pose_instances = [
+        #     transform_points(p.transpose(), trans).transpose()
+        #     for p in pose_instances
+        # ]
+        # pose_instances_tensor = torch.empty(
+        #     self.max_instance_count, 1, 2,
+        #     dtype=torch.float32)
+        # for i, pose_instance in enumerate(pose_instances):
+        #     pose_instances_tensor[i, ...] = torch.from_numpy(pose_instance)
+
+        fecal_boli_xy = transform_points(fecal_boli_xy.transpose(), trans).transpose()
+        heatmap = self._gen_heatmap(fecal_boli_xy)
+
+        return {
+            'image': img,
+            'heatmap': heatmap,
+        }
diff --git a/lib/dataset/hdf5mousepose.py b/lib/dataset/hdf5mousepose.py
new file mode 100644
index 0000000..541e4f5
--- /dev/null
+++ b/lib/dataset/hdf5mousepose.py
@@ -0,0 +1,191 @@
+from collections import OrderedDict
+import copy
+import cv2
+import logging
+import h5py
+import numpy as np
+import random
+import torch
+from torchvision.transforms import ColorJitter
+from torchvision.transforms.functional import to_pil_image, to_tensor
+
+from dataset.JointsDataset import JointsDataset
+from utils.transforms import affine_transform
+from utils.transforms import fliplr_joints
+from utils.xform import centered_transform_mat, random_occlusion
+
+
+NOSE_INDEX = 0
+
+LEFT_EAR_INDEX = 1
+RIGHT_EAR_INDEX = 2
+
+BASE_NECK_INDEX = 3
+
+LEFT_FRONT_PAW_INDEX = 4
+RIGHT_FRONT_PAW_INDEX = 5
+
+CENTER_SPINE_INDEX = 6
+
+LEFT_REAR_PAW_INDEX = 7
+RIGHT_REAR_PAW_INDEX = 8
+
+BASE_TAIL_INDEX = 9
+MID_TAIL_INDEX = 10
+TIP_TAIL_INDEX = 11
+
+logger = logging.getLogger(__name__)
+
+
+class HDF5MousePose(JointsDataset):
+
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        super().__init__(cfg, root, image_set, is_train, transform)
+
+        self.prob_randomized_occlusion = cfg.DATASET.PROB_RANDOMIZED_OCCLUSION
+        self.max_occlusion_size = cfg.DATASET.MAX_OCCLUSION_SIZE
+        self.occlusion_opacities = cfg.DATASET.OCCLUSION_OPACITIES
+        self.prob_randomized_center = cfg.DATASET.PROB_RANDOMIZED_CENTER
+        self.jitter_center = cfg.DATASET.JITTER_CENTER
+        self.jitter_brightness = cfg.DATASET.JITTER_BRIGHTNESS
+        self.jitter_contrast = cfg.DATASET.JITTER_CONTRAST
+        self.jitter_saturation = cfg.DATASET.JITTER_SATURATION
+
+        self.num_joints = 12
+
+        self.flip_pairs = [
+            [LEFT_EAR_INDEX, RIGHT_EAR_INDEX],
+            [LEFT_FRONT_PAW_INDEX, RIGHT_FRONT_PAW_INDEX],
+            [LEFT_REAR_PAW_INDEX, RIGHT_REAR_PAW_INDEX],
+        ]
+
+        self.db = self._get_db()
+
+    def _get_db(self):
+
+        def gen_db():
+            with h5py.File(self.root, 'r') as hdf5file:
+                if self.image_set in hdf5file:
+                    for name, group in hdf5file[self.image_set].items():
+                        if 'frames' in group and 'points' in group:
+                            points = group['points']
+                            for grp_frame_index in range(points.shape[0]):
+                                grp_frame_pts = points[grp_frame_index, ...]
+                                max_x, max_y = np.amax(grp_frame_pts, axis=0)
+                                min_x, min_y = np.amin(grp_frame_pts, axis=0)
+
+                                # width = max_x - min_x
+                                # height = max_y - min_y
+
+                                center_x = (max_x + min_x) / 2
+                                center_y = (max_y + min_y) / 2
+                                center_xy = np.array([center_x, center_y], dtype=np.float32)
+                                # scale = np.array(
+                                #     [
+                                #         width * 1.0 / self.pixel_std,
+                                #         height * 1.0 / self.pixel_std,
+                                #     ],
+                                #     dtype=np.float32)
+                                # scale_range = 0.4
+                                # scale = 1 + np.random.random([2]) * scale_range - scale_range / 2
+                                scale = np.ones([2], dtype=np.float32)
+
+                                joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
+                                joints_3d[:, :2] = grp_frame_pts
+
+                                yield {
+                                    'image': (name, grp_frame_index),
+                                    'center': center_xy,
+                                    'scale': scale,
+                                    'joints_3d': joints_3d,
+                                    'joints_3d_vis': np.ones((self.num_joints, 3), dtype=np.float),
+                                }
+
+        return list(gen_db())
+
+    def __getitem__(self, idx):
+        db_rec = copy.deepcopy(self.db[idx])
+
+        filename = db_rec['filename'] if 'filename' in db_rec else ''
+        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''
+        img_grp_name, img_grp_frame_index = db_rec['image']
+        data_numpy = None
+        with h5py.File(self.root, 'r') as hdf5file:
+            data_numpy = hdf5file[self.image_set][img_grp_name]['frames'][img_grp_frame_index, ...]
+
+        if data_numpy is None:
+            logger.error('=> fail to read {}'.format(db_rec['image']))
+            raise ValueError('Fail to read {}'.format(db_rec['image']))
+
+        joints = db_rec['joints_3d']
+        joints_vis = db_rec['joints_3d_vis']
+
+        c = db_rec['center']
+        s = db_rec['scale']
+        score = db_rec['score'] if 'score' in db_rec else 1
+        r = 0
+
+        if self.is_train:
+            sf = self.scale_factor
+            s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
+            r = 360 * random.random() if random.random() <= 0.8 else 0
+
+            if self.prob_randomized_center > 0 and random.random() <= self.prob_randomized_center:
+                c[0] = data_numpy.shape[1] * random.random()
+                c[1] = data_numpy.shape[0] * random.random()
+            elif self.jitter_center > 0:
+                c[0] += self.image_size[0] * self.jitter_center * np.random.randn()
+                c[1] += self.image_size[1] * self.jitter_center * np.random.randn()
+
+            if self.flip and random.random() <= 0.5:
+                data_numpy = data_numpy[:, ::-1, :]
+                joints, joints_vis = fliplr_joints(
+                    joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
+                c[0] = data_numpy.shape[1] - c[0] - 1
+
+        trans = centered_transform_mat(c, r, s[0], self.image_size)
+        input = cv2.warpAffine(
+            data_numpy,
+            trans[:2, :],
+            (int(self.image_size[0]), int(self.image_size[1])),
+            flags=cv2.INTER_LINEAR)
+
+        if self.is_train:
+            if self.jitter_brightness > 0 or self.jitter_contrast > 0 or self.jitter_saturation > 0:
+                input = to_pil_image(input)
+                input = ColorJitter(self.jitter_brightness, self.jitter_contrast, self.jitter_saturation)(input)
+                input = to_tensor(input).squeeze(0).numpy()
+                input = (input * 255).astype(np.uint8)
+
+            if self.prob_randomized_occlusion > 0 and random.random() <= self.prob_randomized_occlusion:
+                random_occlusion(input, self.max_occlusion_size, np.random.choice(self.occlusion_opacities))
+
+        if self.transform:
+            input = self.transform(input)
+
+        for i in range(self.num_joints):
+            if joints_vis[i, 0] > 0.0:
+                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
+
+        target, target_weight = self.generate_target(joints, joints_vis)
+
+        if not torch.is_tensor(target):
+            target = torch.from_numpy(target)
+        if not torch.is_tensor(target_weight):
+            target_weight = torch.from_numpy(target_weight)
+
+        meta = {
+            'image': db_rec['image'],
+            'filename': filename,
+            'imgnum': imgnum,
+            'joints': joints,
+            'joints_vis': joints_vis,
+            'center': c,
+            'scale': s,
+            'rotation': r,
+            'score': score
+        }
+
+        input = torch.stack([input.squeeze(0)] * 3)
+
+        return input, target, target_weight, meta
diff --git a/lib/dataset/mpii.py b/lib/dataset/mpii.py
new file mode 100644
index 0000000..c935001
--- /dev/null
+++ b/lib/dataset/mpii.py
@@ -0,0 +1,181 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import os
+import json_tricks as json
+from collections import OrderedDict
+
+import numpy as np
+from scipy.io import loadmat, savemat
+
+from dataset.JointsDataset import JointsDataset
+
+
+logger = logging.getLogger(__name__)
+
+
+class MPIIDataset(JointsDataset):
+    def __init__(self, cfg, root, image_set, is_train, transform=None):
+        super().__init__(cfg, root, image_set, is_train, transform)
+
+        self.num_joints = 16
+        self.flip_pairs = [[0, 5], [1, 4], [2, 3], [10, 15], [11, 14], [12, 13]]
+        self.parent_ids = [1, 2, 6, 6, 3, 4, 6, 6, 7, 8, 11, 12, 7, 7, 13, 14]
+
+        self.upper_body_ids = (7, 8, 9, 10, 11, 12, 13, 14, 15)
+        self.lower_body_ids = (0, 1, 2, 3, 4, 5, 6)
+
+        self.db = self._get_db()
+
+        if is_train and cfg.DATASET.SELECT_DATA:
+            self.db = self.select_data(self.db)
+
+        logger.info('=> load {} samples'.format(len(self.db)))
+
+    def _get_db(self):
+        # create train/val split
+        file_name = os.path.join(
+            self.root, 'annot', self.image_set+'.json'
+        )
+        with open(file_name) as anno_file:
+            anno = json.load(anno_file)
+
+        gt_db = []
+        for a in anno:
+            image_name = a['image']
+
+            c = np.array(a['center'], dtype=np.float)
+            s = np.array([a['scale'], a['scale']], dtype=np.float)
+
+            # Adjust center/scale slightly to avoid cropping limbs
+            if c[0] != -1:
+                c[1] = c[1] + 15 * s[1]
+                s = s * 1.25
+
+            # MPII uses matlab format, index is based 1,
+            # we should first convert to 0-based index
+            c = c - 1
+
+            joints_3d = np.zeros((self.num_joints, 3), dtype=np.float)
+            joints_3d_vis = np.zeros((self.num_joints,  3), dtype=np.float)
+            if self.image_set != 'test':
+                joints = np.array(a['joints'])
+                joints[:, 0:2] = joints[:, 0:2] - 1
+                joints_vis = np.array(a['joints_vis'])
+                assert len(joints) == self.num_joints, \
+                    'joint num diff: {} vs {}'.format(len(joints),
+                                                      self.num_joints)
+
+                joints_3d[:, 0:2] = joints[:, 0:2]
+                joints_3d_vis[:, 0] = joints_vis[:]
+                joints_3d_vis[:, 1] = joints_vis[:]
+
+            image_dir = 'images.zip@' if self.data_format == 'zip' else 'images'
+            gt_db.append(
+                {
+                    'image': os.path.join(self.root, image_dir, image_name),
+                    'center': c,
+                    'scale': s,
+                    'joints_3d': joints_3d,
+                    'joints_3d_vis': joints_3d_vis,
+                    'filename': '',
+                    'imgnum': 0,
+                }
+            )
+
+        return gt_db
+
+    def evaluate(self, cfg, preds, output_dir, *args, **kwargs):
+        # convert 0-based index to 1-based index
+        preds = preds[:, :, 0:2] + 1.0
+
+        if output_dir:
+            pred_file = os.path.join(output_dir, 'pred.mat')
+            savemat(pred_file, mdict={'preds': preds})
+
+        if 'test' in cfg.DATASET.TEST_SET:
+            return {'Null': 0.0}, 0.0
+
+        SC_BIAS = 0.6
+        threshold = 0.5
+
+        gt_file = os.path.join(cfg.DATASET.ROOT,
+                               'annot',
+                               'gt_{}.mat'.format(cfg.DATASET.TEST_SET))
+        gt_dict = loadmat(gt_file)
+        dataset_joints = gt_dict['dataset_joints']
+        jnt_missing = gt_dict['jnt_missing']
+        pos_gt_src = gt_dict['pos_gt_src']
+        headboxes_src = gt_dict['headboxes_src']
+
+        pos_pred_src = np.transpose(preds, [1, 2, 0])
+
+        head = np.where(dataset_joints == 'head')[1][0]
+        lsho = np.where(dataset_joints == 'lsho')[1][0]
+        lelb = np.where(dataset_joints == 'lelb')[1][0]
+        lwri = np.where(dataset_joints == 'lwri')[1][0]
+        lhip = np.where(dataset_joints == 'lhip')[1][0]
+        lkne = np.where(dataset_joints == 'lkne')[1][0]
+        lank = np.where(dataset_joints == 'lank')[1][0]
+
+        rsho = np.where(dataset_joints == 'rsho')[1][0]
+        relb = np.where(dataset_joints == 'relb')[1][0]
+        rwri = np.where(dataset_joints == 'rwri')[1][0]
+        rkne = np.where(dataset_joints == 'rkne')[1][0]
+        rank = np.where(dataset_joints == 'rank')[1][0]
+        rhip = np.where(dataset_joints == 'rhip')[1][0]
+
+        jnt_visible = 1 - jnt_missing
+        uv_error = pos_pred_src - pos_gt_src
+        uv_err = np.linalg.norm(uv_error, axis=1)
+        headsizes = headboxes_src[1, :, :] - headboxes_src[0, :, :]
+        headsizes = np.linalg.norm(headsizes, axis=0)
+        headsizes *= SC_BIAS
+        scale = np.multiply(headsizes, np.ones((len(uv_err), 1)))
+        scaled_uv_err = np.divide(uv_err, scale)
+        scaled_uv_err = np.multiply(scaled_uv_err, jnt_visible)
+        jnt_count = np.sum(jnt_visible, axis=1)
+        less_than_threshold = np.multiply((scaled_uv_err <= threshold),
+                                          jnt_visible)
+        PCKh = np.divide(100.*np.sum(less_than_threshold, axis=1), jnt_count)
+
+        # save
+        rng = np.arange(0, 0.5+0.01, 0.01)
+        pckAll = np.zeros((len(rng), 16))
+
+        for r in range(len(rng)):
+            threshold = rng[r]
+            less_than_threshold = np.multiply(scaled_uv_err <= threshold,
+                                              jnt_visible)
+            pckAll[r, :] = np.divide(100.*np.sum(less_than_threshold, axis=1),
+                                     jnt_count)
+
+        PCKh = np.ma.array(PCKh, mask=False)
+        PCKh.mask[6:8] = True
+
+        jnt_count = np.ma.array(jnt_count, mask=False)
+        jnt_count.mask[6:8] = True
+        jnt_ratio = jnt_count / np.sum(jnt_count).astype(np.float64)
+
+        name_value = [
+            ('Head', PCKh[head]),
+            ('Shoulder', 0.5 * (PCKh[lsho] + PCKh[rsho])),
+            ('Elbow', 0.5 * (PCKh[lelb] + PCKh[relb])),
+            ('Wrist', 0.5 * (PCKh[lwri] + PCKh[rwri])),
+            ('Hip', 0.5 * (PCKh[lhip] + PCKh[rhip])),
+            ('Knee', 0.5 * (PCKh[lkne] + PCKh[rkne])),
+            ('Ankle', 0.5 * (PCKh[lank] + PCKh[rank])),
+            ('Mean', np.sum(PCKh * jnt_ratio)),
+            ('Mean@0.1', np.sum(pckAll[11, :] * jnt_ratio))
+        ]
+        name_value = OrderedDict(name_value)
+
+        return name_value, name_value['Mean']
diff --git a/lib/dataset/multimousepose.py b/lib/dataset/multimousepose.py
new file mode 100644
index 0000000..72b3e23
--- /dev/null
+++ b/lib/dataset/multimousepose.py
@@ -0,0 +1,399 @@
+import cv2
+import numpy as np
+import os
+import re
+import skimage.io
+import torch
+from torch.utils.data import Dataset
+from torchvision.transforms import ColorJitter
+from torchvision.transforms.functional import to_pil_image, to_tensor
+import xml.etree.ElementTree as ET
+
+from utils.transforms import affine_transform
+from utils.xform import centered_transform_mat, random_occlusion
+
+
+NOSE_INDEX = 0
+
+LEFT_EAR_INDEX = 1
+RIGHT_EAR_INDEX = 2
+
+BASE_NECK_INDEX = 3
+
+LEFT_FRONT_PAW_INDEX = 4
+RIGHT_FRONT_PAW_INDEX = 5
+
+CENTER_SPINE_INDEX = 6
+
+LEFT_REAR_PAW_INDEX = 7
+RIGHT_REAR_PAW_INDEX = 8
+
+BASE_TAIL_INDEX = 9
+MID_TAIL_INDEX = 10
+TIP_TAIL_INDEX = 11
+
+
+def parse_poses(cvat_xml_path):
+    root = ET.parse(cvat_xml_path)
+    for image_elem in root.findall('./image'):
+        img_name = image_elem.attrib['name']
+        polyline_elems = (
+            pl for pl in image_elem.findall('./polyline')
+            if pl.attrib['label'] == 'mouse'
+        )
+
+        pose_instances = []
+        for polyline_elem in polyline_elems:
+            xy_strs = [
+                xy_str.split(',')
+                for xy_str in polyline_elem.attrib['points'].split(';')
+            ]
+            assert len(xy_strs) 
+
+            xy_points = np.array(
+                [(float(x_str), float(y_str)) for x_str, y_str in xy_strs],
+                dtype=np.float32,
+            )
+            #xy_points = np.transpose(xy_points)
+
+            pose_instances.append(xy_points)
+
+        yield {
+            'image_name': img_name,
+            'pose_instances': pose_instances,
+        }
+
+
+def _get_bounding_box(pose_instances, selected_indexes):
+    all_points = np.concatenate([pose_instances[i] for i in selected_indexes])
+
+    min_xy = all_points.min(0)
+    max_xy = all_points.max(0)
+
+    return min_xy, max_xy
+
+
+def transform_points(xy_points, xform):
+    # need a row of 1's for affine transform matrix mult
+    xy_points_xform = np.concatenate([
+        xy_points,
+        np.ones([1, xy_points.shape[1]], dtype=xy_points.dtype)])
+    xy_points_xform = xform @ xy_points_xform
+
+    return xy_points_xform[:2, :]
+
+
+def _read_image(image_path):
+    data_numpy = skimage.io.imread(image_path, as_gray=True) * 255
+
+    data_numpy = data_numpy.round().astype(np.uint8)
+    data_numpy = data_numpy[..., np.newaxis]
+
+    return data_numpy
+
+
+def decompose_frame_name(frame_filename):
+    m = re.match(r'(.+)_([0-9]+).png', frame_filename)
+    return m.group(1), int(m.group(2))
+
+
+class MultiPoseDataset(Dataset):
+
+    def __init__(self, cfg, image_dir, pose_labels, is_train, transform=None):
+        self.cfg = cfg
+        self.image_dir = image_dir
+        self.pose_labels = pose_labels
+        self.is_train = is_train
+        self.transform = transform
+
+        self.scale_factor = cfg.DATASET.SCALE_FACTOR
+        self.flip = cfg.DATASET.FLIP
+
+        self.image_size = np.array(cfg.MODEL.IMAGE_SIZE)
+        self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE)
+        self.sigma = cfg.MODEL.SIGMA
+        self.num_joints = cfg.MODEL.NUM_JOINTS
+        self.target_type = cfg.MODEL.TARGET_TYPE
+        self.model_extra = cfg.MODEL.EXTRA
+
+        self.use_neighboring_frames = False
+        if 'USE_NEIGHBORING_FRAMES' in self.model_extra:
+            self.use_neighboring_frames = self.model_extra['USE_NEIGHBORING_FRAMES']
+
+        # TODO this really should be in the config since it's specific to mice
+        self.flip_pairs = [
+            [LEFT_EAR_INDEX, RIGHT_EAR_INDEX],
+            [LEFT_FRONT_PAW_INDEX, RIGHT_FRONT_PAW_INDEX],
+            [LEFT_REAR_PAW_INDEX, RIGHT_REAR_PAW_INDEX],
+        ]
+
+        self.max_instance_count = max(len(pl['pose_instances']) for pl in self.pose_labels)
+
+    def __len__(self):
+        return len(self.pose_labels)
+
+    def _gen_joint_heatmaps(self, pose_instances):
+        target = np.zeros(
+            (self.num_joints, self.heatmap_size[1], self.heatmap_size[0]),
+            dtype=np.float32)
+
+        # build target heatmap where each point is the center of a 2D gaussian
+        if self.target_type == 'gaussian':
+            tmp_size = self.sigma * 3
+
+            # TODO can we add sub-pixel precision here?
+            for joint_id in range(self.num_joints):
+                for pose_inst in pose_instances:
+                    feat_stride = self.image_size / self.heatmap_size
+                    mu_x = int(pose_inst[joint_id][0] / feat_stride[0] + 0.5)
+                    mu_y = int(pose_inst[joint_id][1] / feat_stride[1] + 0.5)
+                    # Check that any part of the gaussian is in-bounds
+                    ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+                    br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+                    if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
+                            or br[0] < 0 or br[1] < 0:
+                        # If not, just return the image as is
+                        continue
+
+                    # # Generate gaussian
+                    size = 2 * tmp_size + 1
+                    x = np.arange(0, size, 1, np.float32)
+                    y = x[:, np.newaxis]
+                    x0 = y0 = size // 2
+                    # The gaussian is not normalized, we want the center value to equal 1
+                    g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
+
+                    # Usable gaussian range
+                    g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
+                    g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
+                    # Image range
+                    img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
+                    img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
+
+                    target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
+                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]],
+                        target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]])
+
+        # build target heatmap where each point is the center of a 2D exponential
+        # decay function
+        elif self.target_type == 'exp_decay':
+
+            # for now we require image_size and heatmap_size to be the
+            # same, but we can change code to allow different sizes
+            # later if needed
+            assert np.all(self.image_size == self.heatmap_size)
+            
+            img_width, img_height = self.image_size
+
+            # Each heat patch will just be a small square to save on
+            # compute but large enough to allow almost all decay. For
+            # a lambda of 1 a distance of 3 should be sufficient: (e^-3 = ~0.05).
+            # We just need to scale this by 1/exp_decay_lambda to
+            # make it work for any lambda
+            EXP_DECAY_PATCH_SIZE_FACTOR = 4
+            exp_decay_lambda = self.model_extra['EXP_DECAY_LAMBDA']
+            heat_patch_size = EXP_DECAY_PATCH_SIZE_FACTOR / exp_decay_lambda
+
+            # for each joint within a pose instance we calculate a pose
+            # heatmap patch which will be a 2D exponential decay and then
+            # apply that patch to the target heatmap
+            for joint_id in range(self.num_joints):
+                for pose_inst in pose_instances:
+
+                    mu_x = pose_inst[joint_id][0]
+                    mu_y = pose_inst[joint_id][1]
+
+                    start_x = int(max(np.floor(mu_x - heat_patch_size), 0))
+                    start_y = int(max(np.floor(mu_y - heat_patch_size), 0))
+
+                    stop_x = int(min(np.ceil(mu_x + heat_patch_size + 1), img_width))
+                    stop_y = int(min(np.ceil(mu_y + heat_patch_size + 1), img_height))
+
+                    if start_x < stop_x and start_y < stop_y:
+                        patch_width = stop_x - start_x
+                        patch_height = stop_y - start_y
+
+                        x = np.arange(start_x, stop_x) - mu_x
+                        y = np.arange(start_y, stop_y) - mu_y
+
+                        x_mat = np.tile(x, patch_height).reshape(patch_height, patch_width)
+                        y_mat = np.tile(y, patch_width).reshape(patch_width, patch_height).T
+
+                        xy_mat = np.stack([x_mat, y_mat], axis=2)
+                        dist_mat = np.linalg.norm(xy_mat, axis=2)
+                        decay_mat = np.exp(-exp_decay_lambda * dist_mat)
+
+                        # we apply our 2D exponential decay patch to the target heatmap
+                        # but we do it using maximum so that we get the desired result
+                        # for overlapping patches
+                        target[joint_id][start_y:stop_y, start_x:stop_x] = np.maximum(
+                            decay_mat,
+                            target[joint_id][start_y:stop_y, start_x:stop_x])
+
+        # build target heatmap where each point is a single pixel set to 1.0
+        elif self.target_type == 'point':
+
+            # for now we require image_size and heatmap_size to be the
+            # same, but we can change code to allow different sizes
+            # later if needed
+            assert np.all(self.image_size == self.heatmap_size)
+            
+            img_width, img_height = self.image_size
+
+            # for each joint within a pose instance we set the joint x,y to 1.0
+            for joint_id in range(self.num_joints):
+                for pose_inst in pose_instances:
+
+                    mu_x = int(round(pose_inst[joint_id][0]))
+                    mu_y = int(round(pose_inst[joint_id][1]))
+
+                    # print(type(joint_id), type(mu_y), type(mu_x))
+                    # print(joint_id, mu_y, mu_x)
+                    if 0 <= mu_x < img_width and 0 <= mu_y < img_height:
+                        target[joint_id, mu_y, mu_x] = 1.0
+
+        # if we reach this else we've been given a target type that we don't
+        # know how to deal with
+        else:
+            raise Exception('unexpected target type: {}'.format(self.target_type))
+
+        return torch.tensor(target, dtype=torch.float32)
+
+    def __getitem__(self, idx):
+
+        pose_label = self.pose_labels[idx]
+        image_name = pose_label['image_name']
+        pose_instances = [p.copy() for p in pose_label['pose_instances']]
+        num_instances = len(pose_instances)
+
+        image_size = np.array(self.cfg.MODEL.IMAGE_SIZE, dtype=np.uint32)
+
+        image_path = os.path.join(self.image_dir, image_name)
+        data_numpy = _read_image(image_path)
+
+        if self.use_neighboring_frames:
+            vid_fragment, frame_index = decompose_frame_name(image_path)
+
+            prev_frame_path = '{}_{}.png'.format(vid_fragment, frame_index - 1)
+            prev_data_numpy = _read_image(prev_frame_path)
+
+            next_frame_path = '{}_{}.png'.format(vid_fragment, frame_index + 1)
+            next_data_numpy = _read_image(next_frame_path)
+
+            data_numpy = np.concatenate(
+                [prev_data_numpy, data_numpy, next_data_numpy],
+                axis=-1)
+
+        # keep randomly adding pose instances (without replacement) until the
+        # bounding box is larger than our cropped IMAGE_SIZE. This is done so
+        # that we are ensured our image will contain one or more instances
+        shuff_instances = np.arange(num_instances)
+        np.random.shuffle(shuff_instances)
+        min_xy, max_xy = _get_bounding_box(pose_instances, shuff_instances[:1])
+        for i in range(1, num_instances):
+            curr_min_xy, curr_max_xy = _get_bounding_box(
+                pose_instances,
+                shuff_instances[:i+1])
+
+            xy_diff = curr_max_xy - curr_min_xy
+            if np.any(xy_diff > image_size):
+                # if we add this instance it will put us out of bounds
+                break
+            else:
+                # we're in bounds
+                min_xy = curr_min_xy
+                max_xy = curr_max_xy
+
+        center_xy = (min_xy + max_xy) / 2.0
+
+        scale = self.cfg.DATASET.SCALE
+        rot_deg = 0
+
+        if self.is_train:
+            sf = self.cfg.DATASET.SCALE_FACTOR
+            scale *= np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+            rot_deg = 360 * np.random.random() if np.random.random() <= 0.8 else 0
+
+            prob_randomized_center = self.cfg.DATASET.PROB_RANDOMIZED_CENTER
+            jitter_center = self.cfg.DATASET.JITTER_CENTER
+            if prob_randomized_center > 0 and np.random.random() <= prob_randomized_center:
+                center_xy[0] = data_numpy.shape[0] * np.random.random()
+                center_xy[1] = data_numpy.shape[1] * np.random.random()
+            elif jitter_center > 0:
+                center_xy[0] += image_size[1] * jitter_center * np.random.randn()
+                center_xy[1] += image_size[0] * jitter_center * np.random.randn()
+
+            if self.cfg.DATASET.FLIP and np.random.random() <= 0.5:
+                # reflect the pixels along the X axis
+                data_numpy = data_numpy[:, ::-1, ...]
+
+                # center X needs to be adjusted
+                center_xy[0] = (data_numpy.shape[1] - 1) - center_xy[0]
+
+                for pose_instance in pose_instances:
+                    # reflect the X coords of pose
+                    pose_instance[:, 0] = (data_numpy.shape[1] - 1) - pose_instance[:, 0]
+
+                    for i1, i2 in self.flip_pairs:
+                        # left point is now right and right is now left so these
+                        # keypoints need to be swapped
+                        pose_instance[[i1, i2], :] = pose_instance[[i2, i1], :]
+
+        trans = centered_transform_mat(center_xy, rot_deg, scale, image_size)
+        img = cv2.warpAffine(
+            data_numpy,
+            trans[:2, :],
+            (image_size[0], image_size[1]),
+            flags=cv2.INTER_LINEAR)
+
+        # for training data we throw in some image augmentation:
+        # brightness, contrast and occlusion
+        if self.is_train:
+            jitter_brightness = self.cfg.DATASET.JITTER_BRIGHTNESS
+            jitter_contrast = self.cfg.DATASET.JITTER_CONTRAST
+            if jitter_brightness > 0 or jitter_contrast > 0:
+                img = to_pil_image(img)
+                img = ColorJitter(jitter_brightness, jitter_contrast)(img)
+                img = to_tensor(img).numpy()
+                img = (img * 255).astype(np.uint8)
+
+            prob_randomized_occlusion = self.cfg.DATASET.PROB_RANDOMIZED_OCCLUSION
+            max_occlusion_size = self.cfg.DATASET.MAX_OCCLUSION_SIZE
+            occlusion_opacities = self.cfg.DATASET.OCCLUSION_OPACITIES
+            if prob_randomized_occlusion > 0 and np.random.random() <= prob_randomized_occlusion:
+                random_occlusion(img, max_occlusion_size, np.random.choice(occlusion_opacities))
+        else:
+            if img.ndim == 3:
+                img = np.stack([img[:, :, i] for i in range(img.shape[2])])
+
+        img = torch.from_numpy(img).to(torch.float32) / 255
+        if img.dim() == 2:
+            img = img.unsqueeze(0)
+
+        # if we were provided an image augmentation in the constructor we use it here
+        if self.transform:
+            img = self.transform(img)
+
+        # Batchs need to allow a varying number of pose instances per sample
+        # but pytorch doesn't support jagged/ragged tensors. Instead
+        # we pack the poses into a (possibly) oversized tensor and include the
+        # instance count in the return value. This does waste some space but
+        # in the scheme of things it's negligible.
+        pose_instances = [
+            transform_points(p.transpose(), trans).transpose()
+            for p in pose_instances
+        ]
+        pose_instances_tensor = torch.empty(
+            self.max_instance_count, self.num_joints, 2,
+            dtype=torch.float32)
+        for i, pose_instance in enumerate(pose_instances):
+            pose_instances_tensor[i, ...] = torch.from_numpy(pose_instance)
+
+        joint_heatmaps = self._gen_joint_heatmaps(pose_instances)
+
+        return {
+            'image': img,
+            'joint_heatmaps': joint_heatmaps,
+            'pose_instances': pose_instances_tensor,
+            'instance_count': len(pose_instances)
+        }
diff --git a/lib/dataset/simplepointdata.py b/lib/dataset/simplepointdata.py
new file mode 100644
index 0000000..25e2a49
--- /dev/null
+++ b/lib/dataset/simplepointdata.py
@@ -0,0 +1,283 @@
+import cv2
+import itertools
+import numpy as np
+import os
+import re
+import skimage.io
+import torch
+from torch.utils.data import Dataset
+from torchvision.transforms import ColorJitter
+from torchvision.transforms.functional import to_pil_image, to_tensor
+import xml.etree.ElementTree as ET
+
+from utils.transforms import affine_transform
+from utils.xform import centered_transform_mat, random_occlusion
+
+
+def parse_point_labels(cvat_xml_path, label_attr_name):
+    root = ET.parse(cvat_xml_path)
+    for image_elem in root.findall('./image'):
+        img_name = image_elem.attrib['name']
+        points_elems = itertools.chain(
+            (
+                pl for pl in image_elem.findall('./points')
+                if pl.attrib['label'] == label_attr_name
+            ),
+            (
+                pl for pl in image_elem.findall('./polyline')
+                if pl.attrib['label'] == label_attr_name
+            ),
+        )
+
+        xy_strs = []
+        for points_elem in points_elems:
+            xy_strs += [
+                xy_str.split(',')
+                for xy_str in points_elem.attrib['points'].split(';')
+            ]
+
+        assert len(xy_strs) 
+
+        point_xy = np.array(xy_strs, dtype=np.float)
+        yield {
+            'image_name': img_name,
+            'point_xy': point_xy,
+        }
+
+
+def transform_points(xy_points, xform):
+    # need a row of 1's for affine transform matrix mult
+    xy_points_xform = np.concatenate([
+        xy_points,
+        np.ones([1, xy_points.shape[1]], dtype=xy_points.dtype)])
+    xy_points_xform = xform @ xy_points_xform
+
+    return xy_points_xform[:2, :]
+
+
+def _read_image(image_path):
+    data_numpy = skimage.io.imread(image_path, as_gray=True) * 255
+
+    data_numpy = data_numpy.round().astype(np.uint8)
+    data_numpy = data_numpy[..., np.newaxis]
+
+    return data_numpy
+
+
+class SimplePointDataset(Dataset):
+
+    def __init__(self, cfg, image_dir, point_labels, is_train, transform=None):
+        self.cfg = cfg
+        self.image_dir = image_dir
+        self.point_labels = point_labels
+        self.is_train = is_train
+        self.transform = transform
+
+        self.scale_factor = cfg.DATASET.SCALE_FACTOR
+        self.flip = cfg.DATASET.FLIP
+
+        self.image_size = np.array(cfg.MODEL.IMAGE_SIZE)
+        self.heatmap_size = np.array(cfg.MODEL.HEATMAP_SIZE)
+        self.sigma = cfg.MODEL.SIGMA
+        self.target_type = cfg.MODEL.TARGET_TYPE
+        self.model_extra = cfg.MODEL.EXTRA
+
+    def __len__(self):
+        return len(self.point_labels)
+
+    def _gen_heatmap(self, point_xy):
+        target = np.zeros(
+            (1, self.heatmap_size[1], self.heatmap_size[0]),
+            dtype=np.float32)
+
+        # build target heatmap where each point is the center of a 2D gaussian
+        if self.target_type == 'gaussian':
+            tmp_size = self.sigma * 3
+
+            # # TODO can we add sub-pixel precision here?
+            for curr_xy in point_xy:
+                    feat_stride = self.image_size / self.heatmap_size
+                    mu_x = int(curr_xy[0] / feat_stride[0] + 0.5)
+                    mu_y = int(curr_xy[1] / feat_stride[1] + 0.5)
+                    # Check that any part of the gaussian is in-bounds
+                    ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
+                    br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
+                    if ul[0] >= self.heatmap_size[0] or ul[1] >= self.heatmap_size[1] \
+                            or br[0] < 0 or br[1] < 0:
+                        # If not, just return the image as is
+                        continue
+
+                    # # Generate gaussian
+                    size = 2 * tmp_size + 1
+                    x = np.arange(0, size, 1, np.float32)
+                    y = x[:, np.newaxis]
+                    x0 = y0 = size // 2
+                    # The gaussian is not normalized, we want the center value to equal 1
+                    g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * self.sigma ** 2))
+
+                    # Usable gaussian range
+                    g_x = max(0, -ul[0]), min(br[0], self.heatmap_size[0]) - ul[0]
+                    g_y = max(0, -ul[1]), min(br[1], self.heatmap_size[1]) - ul[1]
+                    # Image range
+                    img_x = max(0, ul[0]), min(br[0], self.heatmap_size[0])
+                    img_y = max(0, ul[1]), min(br[1], self.heatmap_size[1])
+
+                    target[0, img_y[0]:img_y[1], img_x[0]:img_x[1]] = np.maximum(
+                        g[g_y[0]:g_y[1], g_x[0]:g_x[1]],
+                        target[0, img_y[0]:img_y[1], img_x[0]:img_x[1]])
+
+        # build target heatmap where each point is the center of a 2D exponential
+        # decay function
+        elif self.target_type == 'exp_decay':
+
+            # for now we require image_size and heatmap_size to be the
+            # same, but we can change code to allow different sizes
+            # later if needed
+            assert np.all(self.image_size == self.heatmap_size)
+            
+            img_width, img_height = self.image_size
+
+            # Each heat patch will just be a small square to save on
+            # compute but large enough to allow almost all decay. For
+            # a lambda of 1 a distance of 3 should be sufficient: (e^-3 = ~0.05).
+            # We just need to scale this by 1/exp_decay_lambda to
+            # make it work for any lambda
+            EXP_DECAY_PATCH_SIZE_FACTOR = 4
+            exp_decay_lambda = self.model_extra['EXP_DECAY_LAMBDA']
+            heat_patch_size = EXP_DECAY_PATCH_SIZE_FACTOR / exp_decay_lambda
+
+            for curr_xy in point_xy:
+
+                    mu_x = curr_xy[0]
+                    mu_y = curr_xy[1]
+
+                    start_x = int(max(np.floor(mu_x - heat_patch_size), 0))
+                    start_y = int(max(np.floor(mu_y - heat_patch_size), 0))
+
+                    stop_x = int(min(np.ceil(mu_x + heat_patch_size + 1), img_width))
+                    stop_y = int(min(np.ceil(mu_y + heat_patch_size + 1), img_height))
+
+                    if start_x < stop_x and start_y < stop_y:
+                        patch_width = stop_x - start_x
+                        patch_height = stop_y - start_y
+
+                        x = np.arange(start_x, stop_x) - mu_x
+                        y = np.arange(start_y, stop_y) - mu_y
+
+                        x_mat = np.tile(x, patch_height).reshape(patch_height, patch_width)
+                        y_mat = np.tile(y, patch_width).reshape(patch_width, patch_height).T
+
+                        xy_mat = np.stack([x_mat, y_mat], axis=2)
+                        dist_mat = np.linalg.norm(xy_mat, axis=2)
+                        decay_mat = np.exp(-exp_decay_lambda * dist_mat)
+
+                        # we apply our 2D exponential decay patch to the target heatmap
+                        # but we do it using maximum so that we get the desired result
+                        # for overlapping patches
+                        target[0, start_y:stop_y, start_x:stop_x] = np.maximum(
+                            decay_mat,
+                            target[0, start_y:stop_y, start_x:stop_x])
+
+        # build target heatmap where each point is a single pixel set to 1.0
+        elif self.target_type == 'point':
+
+            # for now we require image_size and heatmap_size to be the
+            # same, but we can change code to allow different sizes
+            # later if needed
+            assert np.all(self.image_size == self.heatmap_size)
+            
+            img_width, img_height = self.image_size
+
+            for curr_xy in point_xy:
+
+                    mu_x = int(round(curr_xy[0]))
+                    mu_y = int(round(curr_xy[1]))
+
+                    # print(type(joint_id), type(mu_y), type(mu_x))
+                    # print(joint_id, mu_y, mu_x)
+                    if 0 <= mu_x < img_width and 0 <= mu_y < img_height:
+                        target[0, mu_y, mu_x] = 1.0
+
+        # if we reach this else we've been given a target type that we don't
+        # know how to deal with
+        else:
+            raise Exception('unexpected target type: {}'.format(self.target_type))
+
+        return torch.tensor(target, dtype=torch.float32)
+
+    def __getitem__(self, idx):
+
+        point_label = self.point_labels[idx]
+        image_name = point_label['image_name']
+        point_xy = point_label['point_xy'].copy()
+
+        image_size = np.array(self.cfg.MODEL.IMAGE_SIZE, dtype=np.uint32)
+
+        image_path = os.path.join(self.image_dir, image_name)
+        data_numpy = _read_image(image_path)
+
+        # pick a random point between the min and max points for
+        # the center_xy
+        min_xy = point_xy.min(0)
+        max_xy = point_xy.max(0)
+        diff_xy = max_xy - min_xy
+        center_xy = min_xy + diff_xy * np.random.rand(2)
+
+        scale = self.cfg.DATASET.SCALE
+        rot_deg = 0
+
+        if self.is_train:
+            sf = self.cfg.DATASET.SCALE_FACTOR
+            scale *= np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
+            rot_deg = 360 * np.random.random() if np.random.random() <= 0.8 else 0
+
+            if self.cfg.DATASET.FLIP and np.random.random() <= 0.5:
+                # reflect the pixels along the X axis
+                data_numpy = data_numpy[:, ::-1, ...]
+
+                # center X needs to be adjusted along with all of the Xs in point_xy
+                center_xy[0] = (data_numpy.shape[1] - 1) - center_xy[0]
+                point_xy[:, 0] = (data_numpy.shape[1] - 1) - point_xy[:, 0]
+
+        trans = centered_transform_mat(center_xy, rot_deg, scale, image_size)
+        img = cv2.warpAffine(
+            data_numpy,
+            trans[:2, :],
+            (image_size[0], image_size[1]),
+            flags=cv2.INTER_LINEAR)
+
+        # for training data we throw in some image augmentation:
+        # brightness, contrast
+        if self.is_train:
+            jitter_brightness = self.cfg.DATASET.JITTER_BRIGHTNESS
+            jitter_contrast = self.cfg.DATASET.JITTER_CONTRAST
+            if jitter_brightness > 0 or jitter_contrast > 0:
+                img = to_pil_image(img)
+                img = ColorJitter(jitter_brightness, jitter_contrast)(img)
+                img = to_tensor(img).numpy()
+                img = (img * 255).astype(np.uint8)
+
+            prob_randomized_occlusion = self.cfg.DATASET.PROB_RANDOMIZED_OCCLUSION
+            max_occlusion_size = self.cfg.DATASET.MAX_OCCLUSION_SIZE
+            occlusion_opacities = self.cfg.DATASET.OCCLUSION_OPACITIES
+            if prob_randomized_occlusion > 0 and np.random.random() <= prob_randomized_occlusion:
+                random_occlusion(img, max_occlusion_size, np.random.choice(occlusion_opacities))
+        else:
+            if img.ndim == 3:
+                img = np.stack([img[:, :, i] for i in range(img.shape[2])])
+
+        img = torch.from_numpy(img).to(torch.float32) / 255
+        if img.dim() == 2:
+            img = img.unsqueeze(0)
+
+        # if we were provided an image augmentation in the constructor we use it here
+        if self.transform:
+            img = self.transform(img)
+
+        point_xy = transform_points(point_xy.transpose(), trans).transpose()
+        heatmap = self._gen_heatmap(point_xy)
+
+        return {
+            'image': img,
+            'heatmap': heatmap,
+        }
diff --git a/lib/models/__init__.py b/lib/models/__init__.py
new file mode 100644
index 0000000..e3b7f1a
--- /dev/null
+++ b/lib/models/__init__.py
@@ -0,0 +1,16 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import models.pose_resnet
+import models.pose_hrnet
diff --git a/lib/models/pose_hrnet.py b/lib/models/pose_hrnet.py
new file mode 100644
index 0000000..69b7c96
--- /dev/null
+++ b/lib/models/pose_hrnet.py
@@ -0,0 +1,639 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import logging
+
+import torch
+import torch.nn as nn
+
+
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+
+
+def conv3x3(in_planes, out_planes, stride=1, padding_mode='zeros'):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
+                     padding=1, bias=False, padding_mode=padding_mode)
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, padding_mode='zeros'):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride, padding_mode=padding_mode)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes, padding_mode=padding_mode)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None, padding_mode='zeros'):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, padding_mode=padding_mode, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class HighResolutionModule(nn.Module):
+    def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
+                 num_channels, fuse_method, multi_scale_output=True, padding_mode='zeros'):
+        super(HighResolutionModule, self).__init__()
+        self._check_branches(
+            num_branches, blocks, num_blocks, num_inchannels, num_channels)
+
+        self.padding_mode = padding_mode
+
+        self.num_inchannels = num_inchannels
+        self.fuse_method = fuse_method
+        self.num_branches = num_branches
+
+        self.multi_scale_output = multi_scale_output
+
+        self.branches = self._make_branches(
+            num_branches, blocks, num_blocks, num_channels)
+        self.fuse_layers = self._make_fuse_layers()
+        self.relu = nn.ReLU(True)
+
+    def _check_branches(self, num_branches, blocks, num_blocks,
+                        num_inchannels, num_channels):
+        if num_branches != len(num_blocks):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format(
+                num_branches, len(num_blocks))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_channels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format(
+                num_branches, len(num_channels))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+        if num_branches != len(num_inchannels):
+            error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(
+                num_branches, len(num_inchannels))
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+
+    def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
+                         stride=1):
+        downsample = None
+        if stride != 1 or \
+           self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index] * block.expansion,
+                    kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(
+                    num_channels[branch_index] * block.expansion,
+                    momentum=BN_MOMENTUM
+                ),
+            )
+
+        layers = []
+        layers.append(
+            block(
+                self.num_inchannels[branch_index],
+                num_channels[branch_index],
+                stride,
+                downsample,
+                padding_mode=self.padding_mode,
+            )
+        )
+        self.num_inchannels[branch_index] = \
+            num_channels[branch_index] * block.expansion
+        for i in range(1, num_blocks[branch_index]):
+            layers.append(
+                block(
+                    self.num_inchannels[branch_index],
+                    num_channels[branch_index],
+                    padding_mode=self.padding_mode
+                )
+            )
+
+        return nn.Sequential(*layers)
+
+    def _make_branches(self, num_branches, block, num_blocks, num_channels):
+        branches = []
+
+        for i in range(num_branches):
+            branches.append(
+                self._make_one_branch(i, block, num_blocks, num_channels)
+            )
+
+        return nn.ModuleList(branches)
+
+    def _make_fuse_layers(self):
+        if self.num_branches == 1:
+            return None
+
+        num_branches = self.num_branches
+        num_inchannels = self.num_inchannels
+        fuse_layers = []
+        for i in range(num_branches if self.multi_scale_output else 1):
+            fuse_layer = []
+            for j in range(num_branches):
+                if j > i:
+                    fuse_layer.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_inchannels[j],
+                                num_inchannels[i],
+                                1, 1, 0, bias=False
+                            ),
+                            nn.BatchNorm2d(num_inchannels[i]),
+                            nn.Upsample(scale_factor=2**(j-i), mode='nearest')
+                        )
+                    )
+                elif j == i:
+                    fuse_layer.append(None)
+                else:
+                    conv3x3s = []
+                    for k in range(i-j):
+                        if k == i - j - 1:
+                            num_outchannels_conv3x3 = num_inchannels[i]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3, 2, 1, bias=False, padding_mode=self.padding_mode
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3)
+                                )
+                            )
+                        else:
+                            num_outchannels_conv3x3 = num_inchannels[j]
+                            conv3x3s.append(
+                                nn.Sequential(
+                                    nn.Conv2d(
+                                        num_inchannels[j],
+                                        num_outchannels_conv3x3,
+                                        3, 2, 1, bias=False, padding_mode=self.padding_mode
+                                    ),
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                    nn.ReLU(True)
+                                )
+                            )
+                    fuse_layer.append(nn.Sequential(*conv3x3s))
+            fuse_layers.append(nn.ModuleList(fuse_layer))
+
+        return nn.ModuleList(fuse_layers)
+
+    def get_num_inchannels(self):
+        return self.num_inchannels
+
+    def forward(self, x):
+        if self.num_branches == 1:
+            return [self.branches[0](x[0])]
+
+        for i in range(self.num_branches):
+            x[i] = self.branches[i](x[i])
+
+        x_fuse = []
+
+        for i in range(len(self.fuse_layers)):
+            y = x[0] if i == 0 else self.fuse_layers[i][0](x[0])
+            for j in range(1, self.num_branches):
+                if i == j:
+                    y = y + x[j]
+                else:
+                    y = y + self.fuse_layers[i][j](x[j])
+            x_fuse.append(self.relu(y))
+
+        return x_fuse
+
+
+blocks_dict = {
+    'BASIC': BasicBlock,
+    'BOTTLENECK': Bottleneck
+}
+
+
+class PoseHighResolutionNet(nn.Module):
+
+    def __init__(self, cfg, **kwargs):
+        # self.in_out_ratio = cfg['MODEL']['IMAGE_SIZE'][0] // cfg['MODEL']['HEATMAP_SIZE'][0]
+        # assert self.in_out_ratio == 4 or self.in_out_ratio == 1
+
+        self.inplanes = 64
+        extra = cfg.MODEL.EXTRA
+        super(PoseHighResolutionNet, self).__init__()
+
+        self.padding_mode = 'zeros'
+        if 'CONV_PADDING_MODE' in extra:
+            self.padding_mode = extra['CONV_PADDING_MODE']
+
+        # stem net
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
+                               padding_mode=self.padding_mode, bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.layer1 = self._make_layer(Bottleneck, 64, 4)
+
+        self.stage2_cfg = extra['STAGE2']
+        num_channels = self.stage2_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage2_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition1 = self._make_transition_layer([256], num_channels)
+        self.stage2, pre_stage_channels = self._make_stage(
+            self.stage2_cfg, num_channels)
+
+        self.stage3_cfg = extra['STAGE3']
+        num_channels = self.stage3_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage3_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition2 = self._make_transition_layer(
+            pre_stage_channels, num_channels)
+        self.stage3, pre_stage_channels = self._make_stage(
+            self.stage3_cfg, num_channels)
+
+        self.stage4_cfg = extra['STAGE4']
+        num_channels = self.stage4_cfg['NUM_CHANNELS']
+        block = blocks_dict[self.stage4_cfg['BLOCK']]
+        num_channels = [
+            num_channels[i] * block.expansion for i in range(len(num_channels))
+        ]
+        self.transition3 = self._make_transition_layer(
+            pre_stage_channels, num_channels)
+        self.stage4, pre_stage_channels = self._make_stage(
+            self.stage4_cfg, num_channels, multi_scale_output=False)
+
+        self.head_arch = 'SIMPLE_CONV'
+        if 'HEAD_ARCH' in extra:
+            self.head_arch = extra['HEAD_ARCH']
+
+        out_channels = cfg.MODEL.NUM_JOINTS
+        if 'OUTPUT_CHANNELS_PER_JOINT' in extra:
+            out_channels *= extra['OUTPUT_CHANNELS_PER_JOINT']
+
+        # if self.in_out_ratio == 4:
+        if self.head_arch == 'SIMPLE_CONV':
+            self.final_layer = nn.Conv2d(
+                in_channels=pre_stage_channels[0],
+                out_channels=out_channels,
+                kernel_size=extra.FINAL_CONV_KERNEL,
+                stride=1,
+                padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
+            )
+        # elif self.in_out_ratio == 1:
+        elif self.head_arch == 'CONV_TRANS_UPSCALE_5x5':
+            half_chan_diff = (pre_stage_channels[0] - out_channels) // 2
+            convtrans1_chans = pre_stage_channels[0] - half_chan_diff
+            self.convtrans1 = nn.ConvTranspose2d(
+                in_channels=pre_stage_channels[0],
+                out_channels=convtrans1_chans,
+                kernel_size=5,
+                stride=2,
+                padding=2,
+                output_padding=1,
+            )
+            self.bn3 = nn.BatchNorm2d(convtrans1_chans, momentum=BN_MOMENTUM)
+            self.convtrans2 = nn.ConvTranspose2d(
+                in_channels=convtrans1_chans,
+                out_channels=out_channels,
+                kernel_size=5,
+                stride=2,
+                padding=2,
+                output_padding=1,
+            )
+        elif self.head_arch == 'CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS':
+            half_chan_diff = (pre_stage_channels[0] - out_channels) // 2
+            convtrans1_chans = pre_stage_channels[0] - half_chan_diff
+            self.convtrans1 = nn.ConvTranspose2d(
+                in_channels=pre_stage_channels[0],
+                out_channels=convtrans1_chans,
+                kernel_size=5,
+                stride=2,
+                padding=2,
+                output_padding=1,
+            )
+            self.bn3 = nn.BatchNorm2d(convtrans1_chans, momentum=BN_MOMENTUM)
+            self.conv3 = nn.Conv2d(
+                in_channels=convtrans1_chans,
+                out_channels=convtrans1_chans,
+                kernel_size=5,
+                padding=2,
+                padding_mode=self.padding_mode,
+            )
+            self.bn4 = nn.BatchNorm2d(convtrans1_chans, momentum=BN_MOMENTUM)
+            self.convtrans2 = nn.ConvTranspose2d(
+                in_channels=convtrans1_chans,
+                out_channels=out_channels,
+                kernel_size=5,
+                stride=2,
+                padding=2,
+                output_padding=1,
+            )
+            self.bn5 = nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM)
+            self.conv4 = nn.Conv2d(
+                in_channels=out_channels,
+                out_channels=out_channels,
+                kernel_size=5,
+                padding=2,
+                padding_mode=self.padding_mode,
+            )
+        elif self.head_arch == 'CONV_TRANS_UPSCALE_3x3':
+            half_chan_diff = (pre_stage_channels[0] - out_channels) // 2
+            convtrans1_chans = pre_stage_channels[0] - half_chan_diff
+            self.convtrans1 = nn.ConvTranspose2d(
+                in_channels=pre_stage_channels[0],
+                out_channels=convtrans1_chans,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                output_padding=1,
+            )
+            self.bn3 = nn.BatchNorm2d(convtrans1_chans, momentum=BN_MOMENTUM)
+            self.convtrans2 = nn.ConvTranspose2d(
+                in_channels=convtrans1_chans,
+                out_channels=out_channels,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                output_padding=1,
+            )
+        else:
+            raise Exception('unexpected HEAD_ARCH of {}'.format(self.head_arch))
+
+        self.pretrained_layers = extra['PRETRAINED_LAYERS']
+        if 'FROZEN_LAYERS' in extra:
+            self.frozen_layers = extra['FROZEN_LAYERS']
+        else:
+            self.frozen_layers = []
+
+    def _make_transition_layer(
+            self, num_channels_pre_layer, num_channels_cur_layer):
+        num_branches_cur = len(num_channels_cur_layer)
+        num_branches_pre = len(num_channels_pre_layer)
+
+        transition_layers = []
+        for i in range(num_branches_cur):
+            if i < num_branches_pre:
+                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
+                    transition_layers.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                num_channels_pre_layer[i],
+                                num_channels_cur_layer[i],
+                                3, 1, 1, bias=False,
+                                padding_mode=self.padding_mode,
+                            ),
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(inplace=True)
+                        )
+                    )
+                else:
+                    transition_layers.append(None)
+            else:
+                conv3x3s = []
+                for j in range(i+1-num_branches_pre):
+                    inchannels = num_channels_pre_layer[-1]
+                    outchannels = num_channels_cur_layer[i] \
+                        if j == i-num_branches_pre else inchannels
+                    conv3x3s.append(
+                        nn.Sequential(
+                            nn.Conv2d(
+                                inchannels, outchannels, 3, 2, 1, bias=False,
+                                padding_mode=self.padding_mode,
+                            ),
+                            nn.BatchNorm2d(outchannels),
+                            nn.ReLU(inplace=True)
+                        )
+                    )
+                transition_layers.append(nn.Sequential(*conv3x3s))
+
+        return nn.ModuleList(transition_layers)
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(
+                    self.inplanes, planes * block.expansion,
+                    kernel_size=1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(block(
+            self.inplanes, planes, stride, downsample,
+            padding_mode=self.padding_mode))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes, padding_mode=self.padding_mode))
+
+        return nn.Sequential(*layers)
+
+    def _make_stage(self, layer_config, num_inchannels,
+                    multi_scale_output=True):
+        num_modules = layer_config['NUM_MODULES']
+        num_branches = layer_config['NUM_BRANCHES']
+        num_blocks = layer_config['NUM_BLOCKS']
+        num_channels = layer_config['NUM_CHANNELS']
+        block = blocks_dict[layer_config['BLOCK']]
+        fuse_method = layer_config['FUSE_METHOD']
+
+        modules = []
+        for i in range(num_modules):
+            # multi_scale_output is only used last module
+            if not multi_scale_output and i == num_modules - 1:
+                reset_multi_scale_output = False
+            else:
+                reset_multi_scale_output = True
+
+            modules.append(
+                HighResolutionModule(
+                    num_branches,
+                    block,
+                    num_blocks,
+                    num_inchannels,
+                    num_channels,
+                    fuse_method,
+                    reset_multi_scale_output,
+                    padding_mode=self.padding_mode,
+                )
+            )
+            num_inchannels = modules[-1].get_num_inchannels()
+
+        return nn.Sequential(*modules), num_inchannels
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
+        x = self.layer1(x)
+
+        x_list = []
+        for i in range(self.stage2_cfg['NUM_BRANCHES']):
+            if self.transition1[i] is not None:
+                x_list.append(self.transition1[i](x))
+            else:
+                x_list.append(x)
+        y_list = self.stage2(x_list)
+
+        x_list = []
+        for i in range(self.stage3_cfg['NUM_BRANCHES']):
+            if self.transition2[i] is not None:
+                x_list.append(self.transition2[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage3(x_list)
+
+        x_list = []
+        for i in range(self.stage4_cfg['NUM_BRANCHES']):
+            if self.transition3[i] is not None:
+                x_list.append(self.transition3[i](y_list[-1]))
+            else:
+                x_list.append(y_list[i])
+        y_list = self.stage4(x_list)
+
+        # if self.in_out_ratio == 4:
+        if self.head_arch == 'SIMPLE_CONV':
+            x = self.final_layer(y_list[0])
+        elif self.head_arch in ('CONV_TRANS_UPSCALE_5x5', 'CONV_TRANS_UPSCALE_3x3'):
+            x = self.convtrans1(y_list[0])
+            x = self.bn3(x)
+            x = self.relu(x)
+
+            x = self.convtrans2(x)
+        elif self.head_arch == 'CONV_TRANS_UPSCALE_5x5_EXTRA_CONVS':
+            x = self.convtrans1(y_list[0])
+            x = self.bn3(x)
+            x = self.relu(x)
+
+            x = self.conv3(x)
+            x = self.bn4(x)
+            x = self.relu(x)
+
+            x = self.convtrans2(x)
+            x = self.bn5(x)
+            x = self.relu(x)
+
+            x = self.conv4(x)
+        else:
+            raise Exception('unexpected HEAD_ARCH of {}'.format(self.head_arch))
+
+        return x
+
+    def init_weights(self, pretrained=''):
+        logger.info('=> init weights from normal distribution')
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                nn.init.normal_(m.weight, std=0.001)
+                for name, _ in m.named_parameters():
+                    if name in ['bias']:
+                        nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.ConvTranspose2d):
+                nn.init.normal_(m.weight, std=0.001)
+                for name, _ in m.named_parameters():
+                    if name in ['bias']:
+                        nn.init.constant_(m.bias, 0)
+
+        if os.path.isfile(pretrained):
+            pretrained_state_dict = torch.load(pretrained)
+            logger.info('=> loading pretrained model {}'.format(pretrained))
+
+            need_init_state_dict = {}
+            for name, m in pretrained_state_dict.items():
+                if name.split('.')[0] in self.pretrained_layers \
+                   or self.pretrained_layers[0] is '*':
+                    need_init_state_dict[name] = m
+            self.load_state_dict(need_init_state_dict, strict=False)
+        elif pretrained:
+            logger.error('=> please download pre-trained models first!')
+            raise ValueError('{} is not exist!'.format(pretrained))
+
+        if self.frozen_layers:
+            for name, param in self.named_parameters():
+                if name.split('.')[0] in self.frozen_layers:
+                    param.requires_grad = False
+
+
+def get_pose_net(cfg, is_train, **kwargs):
+    model = PoseHighResolutionNet(cfg, **kwargs)
+
+    if is_train and cfg.MODEL.INIT_WEIGHTS:
+        model.init_weights(cfg.MODEL.PRETRAINED)
+
+    return model
diff --git a/lib/models/pose_resnet.py b/lib/models/pose_resnet.py
new file mode 100644
index 0000000..f264dee
--- /dev/null
+++ b/lib/models/pose_resnet.py
@@ -0,0 +1,271 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import logging
+
+import torch
+import torch.nn as nn
+
+
+BN_MOMENTUM = 0.1
+logger = logging.getLogger(__name__)
+
+
+def conv3x3(in_planes, out_planes, stride=1):
+    """3x3 convolution with padding"""
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=3, stride=stride,
+        padding=1, bias=False
+    )
+
+
+class BasicBlock(nn.Module):
+    expansion = 1
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(BasicBlock, self).__init__()
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class Bottleneck(nn.Module):
+    expansion = 4
+
+    def __init__(self, inplanes, planes, stride=1, downsample=None):
+        super(Bottleneck, self).__init__()
+        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
+                               padding=1, bias=False)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        residual = x
+
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+
+        out = self.conv3(out)
+        out = self.bn3(out)
+
+        if self.downsample is not None:
+            residual = self.downsample(x)
+
+        out += residual
+        out = self.relu(out)
+
+        return out
+
+
+class PoseResNet(nn.Module):
+
+    def __init__(self, block, layers, cfg, **kwargs):
+        self.inplanes = 64
+        extra = cfg.MODEL.EXTRA
+        self.deconv_with_bias = extra.DECONV_WITH_BIAS
+
+        super(PoseResNet, self).__init__()
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
+                               bias=False)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
+        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
+        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
+
+        # used for deconv layers
+        self.deconv_layers = self._make_deconv_layer(
+            extra.NUM_DECONV_LAYERS,
+            extra.NUM_DECONV_FILTERS,
+            extra.NUM_DECONV_KERNELS,
+        )
+
+        self.final_layer = nn.Conv2d(
+            in_channels=extra.NUM_DECONV_FILTERS[-1],
+            out_channels=cfg.MODEL.NUM_JOINTS,
+            kernel_size=extra.FINAL_CONV_KERNEL,
+            stride=1,
+            padding=1 if extra.FINAL_CONV_KERNEL == 3 else 0
+        )
+
+    def _make_layer(self, block, planes, blocks, stride=1):
+        downsample = None
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                nn.Conv2d(self.inplanes, planes * block.expansion,
+                          kernel_size=1, stride=stride, bias=False),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
+            )
+
+        layers = []
+        layers.append(block(self.inplanes, planes, stride, downsample))
+        self.inplanes = planes * block.expansion
+        for i in range(1, blocks):
+            layers.append(block(self.inplanes, planes))
+
+        return nn.Sequential(*layers)
+
+    def _get_deconv_cfg(self, deconv_kernel, index):
+        if deconv_kernel == 4:
+            padding = 1
+            output_padding = 0
+        elif deconv_kernel == 3:
+            padding = 1
+            output_padding = 1
+        elif deconv_kernel == 2:
+            padding = 0
+            output_padding = 0
+
+        return deconv_kernel, padding, output_padding
+
+    def _make_deconv_layer(self, num_layers, num_filters, num_kernels):
+        assert num_layers == len(num_filters), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+        assert num_layers == len(num_kernels), \
+            'ERROR: num_deconv_layers is different len(num_deconv_filters)'
+
+        layers = []
+        for i in range(num_layers):
+            kernel, padding, output_padding = \
+                self._get_deconv_cfg(num_kernels[i], i)
+
+            planes = num_filters[i]
+            layers.append(
+                nn.ConvTranspose2d(
+                    in_channels=self.inplanes,
+                    out_channels=planes,
+                    kernel_size=kernel,
+                    stride=2,
+                    padding=padding,
+                    output_padding=output_padding,
+                    bias=self.deconv_with_bias))
+            layers.append(nn.BatchNorm2d(planes, momentum=BN_MOMENTUM))
+            layers.append(nn.ReLU(inplace=True))
+            self.inplanes = planes
+
+        return nn.Sequential(*layers)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+
+        x = self.deconv_layers(x)
+        x = self.final_layer(x)
+
+        return x
+
+    def init_weights(self, pretrained=''):
+        if os.path.isfile(pretrained):
+            logger.info('=> init deconv weights from normal distribution')
+            for name, m in self.deconv_layers.named_modules():
+                if isinstance(m, nn.ConvTranspose2d):
+                    logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
+                    logger.info('=> init {}.bias as 0'.format(name))
+                    nn.init.normal_(m.weight, std=0.001)
+                    if self.deconv_with_bias:
+                        nn.init.constant_(m.bias, 0)
+                elif isinstance(m, nn.BatchNorm2d):
+                    logger.info('=> init {}.weight as 1'.format(name))
+                    logger.info('=> init {}.bias as 0'.format(name))
+                    nn.init.constant_(m.weight, 1)
+                    nn.init.constant_(m.bias, 0)
+            logger.info('=> init final conv weights from normal distribution')
+            for m in self.final_layer.modules():
+                if isinstance(m, nn.Conv2d):
+                    # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                    logger.info('=> init {}.weight as normal(0, 0.001)'.format(name))
+                    logger.info('=> init {}.bias as 0'.format(name))
+                    nn.init.normal_(m.weight, std=0.001)
+                    nn.init.constant_(m.bias, 0)
+
+            pretrained_state_dict = torch.load(pretrained)
+            logger.info('=> loading pretrained model {}'.format(pretrained))
+            self.load_state_dict(pretrained_state_dict, strict=False)
+        else:
+            logger.info('=> init weights from normal distribution')
+            for m in self.modules():
+                if isinstance(m, nn.Conv2d):
+                    # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+                    nn.init.normal_(m.weight, std=0.001)
+                    # nn.init.constant_(m.bias, 0)
+                elif isinstance(m, nn.BatchNorm2d):
+                    nn.init.constant_(m.weight, 1)
+                    nn.init.constant_(m.bias, 0)
+                elif isinstance(m, nn.ConvTranspose2d):
+                    nn.init.normal_(m.weight, std=0.001)
+                    if self.deconv_with_bias:
+                        nn.init.constant_(m.bias, 0)
+
+
+resnet_spec = {
+    18: (BasicBlock, [2, 2, 2, 2]),
+    34: (BasicBlock, [3, 4, 6, 3]),
+    50: (Bottleneck, [3, 4, 6, 3]),
+    101: (Bottleneck, [3, 4, 23, 3]),
+    152: (Bottleneck, [3, 8, 36, 3])
+}
+
+
+def get_pose_net(cfg, is_train, **kwargs):
+    num_layers = cfg.MODEL.EXTRA.NUM_LAYERS
+
+    block_class, layers = resnet_spec[num_layers]
+
+    model = PoseResNet(block_class, layers, cfg, **kwargs)
+
+    if is_train and cfg.MODEL.INIT_WEIGHTS:
+        model.init_weights(cfg.MODEL.PRETRAINED)
+
+    return model
diff --git a/lib/nms/__init__.py b/lib/nms/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lib/nms/cpu_nms.pyx b/lib/nms/cpu_nms.pyx
new file mode 100644
index 0000000..3cd0d74
--- /dev/null
+++ b/lib/nms/cpu_nms.pyx
@@ -0,0 +1,71 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+cimport numpy as np
+
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+
+def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1].astype('i')
+
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+
+    keep = []
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        keep.append(i)
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+
+    return keep
diff --git a/lib/nms/gpu_nms.cu b/lib/nms/gpu_nms.cu
new file mode 100644
index 0000000..2f6931e
--- /dev/null
+++ b/lib/nms/gpu_nms.cu
@@ -0,0 +1,7080 @@
+// ------------------------------------------------------------------
+// Copyright (c) Microsoft
+// Licensed under The MIT License
+// Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
+// ------------------------------------------------------------------
+
+//#include "gpu_nms.hpp"
+#include <vector>
+#include <iostream>
+
+
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    if (error != cudaSuccess) { \
+      std::cout << cudaGetErrorString(error) << std::endl; \
+    } \
+  } while (0)
+
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+void _set_device(int device_id) {
+  int current_device;
+  CUDA_CHECK(cudaGetDevice(&current_device));
+  if (current_device == device_id) {
+    return;
+  }
+  // The call to cudaSetDevice must come before any calls to Get, which
+  // may perform initialization using the GPU.
+  CUDA_CHECK(cudaSetDevice(device_id));
+}
+
+void _nms(long* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id) {
+  _set_device(device_id);
+
+  float* boxes_dev = NULL;
+  unsigned long long* mask_dev = NULL;
+
+  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+
+  CUDA_CHECK(cudaMalloc(&boxes_dev,
+                        boxes_num * boxes_dim * sizeof(float)));
+  CUDA_CHECK(cudaMemcpy(boxes_dev,
+                        boxes_host,
+                        boxes_num * boxes_dim * sizeof(float),
+                        cudaMemcpyHostToDevice));
+
+  CUDA_CHECK(cudaMalloc(&mask_dev,
+                        boxes_num * col_blocks * sizeof(unsigned long long)));
+
+  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
+              DIVUP(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  CUDA_CHECK(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  *num_out = num_to_keep;
+
+  CUDA_CHECK(cudaFree(boxes_dev));
+  CUDA_CHECK(cudaFree(mask_dev));
+}
+
+
+
+
+
+
+
+
+
+
+/* Generated by Cython 0.24 */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#ifndef Py_PYTHON_H
+    #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000)
+    #error Cython requires Python 2.6+ or Python 3.2+.
+#else
+#define CYTHON_ABI "0_24"
+#include <stddef.h>
+#ifndef offsetof
+  #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+  #ifndef __stdcall
+    #define __stdcall
+  #endif
+  #ifndef __cdecl
+    #define __cdecl
+  #endif
+  #ifndef __fastcall
+    #define __fastcall
+  #endif
+#endif
+#ifndef DL_IMPORT
+  #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+  #define DL_EXPORT(t) t
+#endif
+#ifndef PY_LONG_LONG
+  #define PY_LONG_LONG LONG_LONG
+#endif
+#ifndef Py_HUGE_VAL
+  #define Py_HUGE_VAL HUGE_VAL
+#endif
+#ifdef PYPY_VERSION
+  #define CYTHON_COMPILING_IN_PYPY 1
+  #define CYTHON_COMPILING_IN_CPYTHON 0
+#else
+  #define CYTHON_COMPILING_IN_PYPY 0
+  #define CYTHON_COMPILING_IN_CPYTHON 1
+#endif
+#if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000
+  #define CYTHON_USE_PYLONG_INTERNALS 1
+#endif
+#if CYTHON_USE_PYLONG_INTERNALS
+  #include "longintrepr.h"
+  #undef SHIFT
+  #undef BASE
+  #undef MASK
+#endif
+#if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
+  #define Py_OptimizeFlag 0
+#endif
+#define __PYX_BUILD_PY_SSIZE_T "n"
+#define CYTHON_FORMAT_SSIZE_T "z"
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+  #define __Pyx_DefaultClassType PyClass_Type
+#else
+  #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+  #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
+          PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+  #define __Pyx_DefaultClassType PyType_Type
+#endif
+#ifndef Py_TPFLAGS_CHECKTYPES
+  #define Py_TPFLAGS_CHECKTYPES 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_INDEX
+  #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_NEWBUFFER
+  #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+#ifndef Py_TPFLAGS_HAVE_FINALIZE
+  #define Py_TPFLAGS_HAVE_FINALIZE 0
+#endif
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
+  #define CYTHON_PEP393_ENABLED 1
+  #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ?\
+                                              0 : _PyUnicode_Ready((PyObject *)(op)))
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+  #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u)
+  #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
+  #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != (likely(PyUnicode_IS_READY(u)) ? PyUnicode_GET_LENGTH(u) : PyUnicode_GET_SIZE(u)))
+#else
+  #define CYTHON_PEP393_ENABLED 0
+  #define __Pyx_PyUnicode_READY(op)       (0)
+  #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
+  #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+  #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE))
+  #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
+  #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
+  #define __Pyx_PyUnicode_IS_TRUE(u)      (0 != PyUnicode_GET_SIZE(u))
+#endif
+#if CYTHON_COMPILING_IN_PYPY
+  #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
+#else
+  #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
+  #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
+      PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
+  #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Format)
+  #define PyObject_Format(obj, fmt)  PyObject_CallMethod(obj, "__format__", "O", fmt)
+#endif
+#if CYTHON_COMPILING_IN_PYPY && !defined(PyObject_Malloc)
+  #define PyObject_Malloc(s)   PyMem_Malloc(s)
+  #define PyObject_Free(p)     PyMem_Free(p)
+  #define PyObject_Realloc(p)  PyMem_Realloc(p)
+#endif
+#define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
+#define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
+#else
+  #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
+#endif
+#if PY_MAJOR_VERSION < 3 && !defined(PyObject_ASCII)
+  #define PyObject_ASCII(o)            PyObject_Repr(o)
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBaseString_Type            PyUnicode_Type
+  #define PyStringObject               PyUnicodeObject
+  #define PyString_Type                PyUnicode_Type
+  #define PyString_Check               PyUnicode_Check
+  #define PyString_CheckExact          PyUnicode_CheckExact
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
+  #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
+#else
+  #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
+  #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
+#endif
+#ifndef PySet_CheckExact
+  #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
+#endif
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+#if PY_MAJOR_VERSION >= 3
+  #define PyIntObject                  PyLongObject
+  #define PyInt_Type                   PyLong_Type
+  #define PyInt_Check(op)              PyLong_Check(op)
+  #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
+  #define PyInt_FromString             PyLong_FromString
+  #define PyInt_FromUnicode            PyLong_FromUnicode
+  #define PyInt_FromLong               PyLong_FromLong
+  #define PyInt_FromSize_t             PyLong_FromSize_t
+  #define PyInt_FromSsize_t            PyLong_FromSsize_t
+  #define PyInt_AsLong                 PyLong_AsLong
+  #define PyInt_AS_LONG                PyLong_AS_LONG
+  #define PyInt_AsSsize_t              PyLong_AsSsize_t
+  #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
+  #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+  #define PyNumber_Int                 PyNumber_Long
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define PyBoolObject                 PyLongObject
+#endif
+#if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
+  #ifndef PyUnicode_InternFromString
+    #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
+  #endif
+#endif
+#if PY_VERSION_HEX < 0x030200A4
+  typedef long Py_hash_t;
+  #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+  #define __Pyx_PyInt_AsHash_t   PyInt_AsLong
+#else
+  #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+  #define __Pyx_PyInt_AsHash_t   PyInt_AsSsize_t
+#endif
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
+#else
+  #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
+#endif
+#if PY_VERSION_HEX >= 0x030500B1
+#define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
+#define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
+#elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
+typedef struct {
+    unaryfunc am_await;
+    unaryfunc am_aiter;
+    unaryfunc am_anext;
+} __Pyx_PyAsyncMethodsStruct;
+#define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
+#else
+#define __Pyx_PyType_AsAsync(obj) NULL
+#endif
+#ifndef CYTHON_RESTRICT
+  #if defined(__GNUC__)
+    #define CYTHON_RESTRICT __restrict__
+  #elif defined(_MSC_VER) && _MSC_VER >= 1400
+    #define CYTHON_RESTRICT __restrict
+  #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define CYTHON_RESTRICT restrict
+  #else
+    #define CYTHON_RESTRICT
+  #endif
+#endif
+#define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
+
+#ifndef __cplusplus
+  #error "Cython files generated with the C++ option must be compiled with a C++ compiler."
+#endif
+#ifndef CYTHON_INLINE
+  #define CYTHON_INLINE inline
+#endif
+template<typename T>
+void __Pyx_call_destructor(T& x) {
+    x.~T();
+}
+template<typename T>
+class __Pyx_FakeReference {
+  public:
+    __Pyx_FakeReference() : ptr(NULL) { }
+    __Pyx_FakeReference(const T& ref) : ptr(const_cast<T*>(&ref)) { }
+    T *operator->() { return ptr; }
+    operator T&() { return *ptr; }
+  private:
+    T *ptr;
+};
+
+#if defined(WIN32) || defined(MS_WINDOWS)
+  #define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+#ifdef NAN
+#define __PYX_NAN() ((float) NAN)
+#else
+static CYTHON_INLINE float __PYX_NAN() {
+  float value;
+  memset(&value, 0xFF, sizeof(value));
+  return value;
+}
+#endif
+
+
+#define __PYX_ERR(f_index, lineno, Ln_error) \
+{ \
+  __pyx_filename = __pyx_f[f_index]; __pyx_lineno = lineno; __pyx_clineno = __LINE__; goto Ln_error; \
+}
+
+#if PY_MAJOR_VERSION >= 3
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
+#else
+  #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
+  #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceDivide(x,y)
+#endif
+
+#ifndef __PYX_EXTERN_C
+  #ifdef __cplusplus
+    #define __PYX_EXTERN_C extern "C"
+  #else
+    #define __PYX_EXTERN_C extern
+  #endif
+#endif
+
+#define __PYX_HAVE__nms__gpu_nms
+#define __PYX_HAVE_API__nms__gpu_nms
+#include "string.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "numpy/arrayobject.h"
+#include "numpy/ufuncobject.h"
+#include "gpu_nms.hpp"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#ifdef PYREX_WITHOUT_ASSERTIONS
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+#   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#     define CYTHON_UNUSED __attribute__ ((__unused__))
+#   else
+#     define CYTHON_UNUSED
+#   endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+#   define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+#   define CYTHON_UNUSED
+# endif
+#endif
+#ifndef CYTHON_NCP_UNUSED
+# if CYTHON_COMPILING_IN_CPYTHON
+#  define CYTHON_NCP_UNUSED
+# else
+#  define CYTHON_NCP_UNUSED CYTHON_UNUSED
+# endif
+#endif
+typedef struct {PyObject **p; const char *s; const Py_ssize_t n; const char* encoding;
+                const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
+
+#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
+#define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
+#define __PYX_DEFAULT_STRING_ENCODING ""
+#define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
+#define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#define __Pyx_uchar_cast(c) ((unsigned char)c)
+#define __Pyx_long_cast(x) ((long)x)
+#define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (\
+    (sizeof(type) < sizeof(Py_ssize_t))  ||\
+    (sizeof(type) > sizeof(Py_ssize_t) &&\
+          likely(v < (type)PY_SSIZE_T_MAX ||\
+                 v == (type)PY_SSIZE_T_MAX)  &&\
+          (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
+                                v == (type)PY_SSIZE_T_MIN)))  ||\
+    (sizeof(type) == sizeof(Py_ssize_t) &&\
+          (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
+                               v == (type)PY_SSIZE_T_MAX)))  )
+#if defined (__cplusplus) && __cplusplus >= 201103L
+    #include <cstdlib>
+    #define __Pyx_sst_abs(value) std::abs(value)
+#elif SIZEOF_INT >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) abs(value)
+#elif SIZEOF_LONG >= SIZEOF_SIZE_T
+    #define __Pyx_sst_abs(value) labs(value)
+#elif defined (_MSC_VER) && defined (_M_X64)
+    #define __Pyx_sst_abs(value) _abs64(value)
+#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    #define __Pyx_sst_abs(value) llabs(value)
+#elif defined (__GNUC__)
+    #define __Pyx_sst_abs(value) __builtin_llabs(value)
+#else
+    #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
+#endif
+static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
+static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
+#define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
+#define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
+#define __Pyx_PyBytes_FromString        PyBytes_FromString
+#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
+#if PY_MAJOR_VERSION < 3
+    #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
+#else
+    #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
+    #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
+#endif
+#define __Pyx_PyObject_AsSString(s)    ((signed char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_AsUString(s)    ((unsigned char*) __Pyx_PyObject_AsString(s))
+#define __Pyx_PyObject_FromCString(s)  __Pyx_PyObject_FromString((const char*)s)
+#define __Pyx_PyBytes_FromCString(s)   __Pyx_PyBytes_FromString((const char*)s)
+#define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
+#define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
+#define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
+#if PY_MAJOR_VERSION < 3
+static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
+{
+    const Py_UNICODE *u_end = u;
+    while (*u_end++) ;
+    return (size_t)(u_end - u - 1);
+}
+#else
+#define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
+#endif
+#define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
+#define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
+#define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
+#define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
+#define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
+#define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False))
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x);
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#else
+#define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
+#endif
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+#if PY_MAJOR_VERSION >= 3
+#define __Pyx_PyNumber_Int(x) (PyLong_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Long(x))
+#else
+#define __Pyx_PyNumber_Int(x) (PyInt_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Int(x))
+#endif
+#define __Pyx_PyNumber_Float(x) (PyFloat_CheckExact(x) ? __Pyx_NewRef(x) : PyNumber_Float(x))
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+static int __Pyx_sys_getdefaultencoding_not_ascii;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    PyObject* ascii_chars_u = NULL;
+    PyObject* ascii_chars_b = NULL;
+    const char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    if (strcmp(default_encoding_c, "ascii") == 0) {
+        __Pyx_sys_getdefaultencoding_not_ascii = 0;
+    } else {
+        char ascii_chars[128];
+        int c;
+        for (c = 0; c < 128; c++) {
+            ascii_chars[c] = c;
+        }
+        __Pyx_sys_getdefaultencoding_not_ascii = 1;
+        ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
+        if (!ascii_chars_u) goto bad;
+        ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
+        if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
+            PyErr_Format(
+                PyExc_ValueError,
+                "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
+                default_encoding_c);
+            goto bad;
+        }
+        Py_DECREF(ascii_chars_u);
+        Py_DECREF(ascii_chars_b);
+    }
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    Py_XDECREF(ascii_chars_u);
+    Py_XDECREF(ascii_chars_b);
+    return -1;
+}
+#endif
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
+#else
+#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
+#if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
+static char* __PYX_DEFAULT_STRING_ENCODING;
+static int __Pyx_init_sys_getdefaultencoding_params(void) {
+    PyObject* sys;
+    PyObject* default_encoding = NULL;
+    char* default_encoding_c;
+    sys = PyImport_ImportModule("sys");
+    if (!sys) goto bad;
+    default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
+    Py_DECREF(sys);
+    if (!default_encoding) goto bad;
+    default_encoding_c = PyBytes_AsString(default_encoding);
+    if (!default_encoding_c) goto bad;
+    __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
+    if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
+    strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
+    Py_DECREF(default_encoding);
+    return 0;
+bad:
+    Py_XDECREF(default_encoding);
+    return -1;
+}
+#endif
+#endif
+
+
+/* Test for GCC > 2.95 */
+#if defined(__GNUC__)     && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
+  #define likely(x)   __builtin_expect(!!(x), 1)
+  #define unlikely(x) __builtin_expect(!!(x), 0)
+#else /* !__GNUC__ or GCC < 2.95 */
+  #define likely(x)   (x)
+  #define unlikely(x) (x)
+#endif /* __GNUC__ */
+
+static PyObject *__pyx_m;
+static PyObject *__pyx_d;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static PyObject *__pyx_empty_unicode;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+/* None.proto */
+#if !defined(CYTHON_CCOMPLEX)
+  #if defined(__cplusplus)
+    #define CYTHON_CCOMPLEX 1
+  #elif defined(_Complex_I)
+    #define CYTHON_CCOMPLEX 1
+  #else
+    #define CYTHON_CCOMPLEX 0
+  #endif
+#endif
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #include <complex>
+  #else
+    #include <complex.h>
+  #endif
+#endif
+#if CYTHON_CCOMPLEX && !defined(__cplusplus) && defined(__sun__) && defined(__GNUC__)
+  #undef _Complex_I
+  #define _Complex_I 1.0fj
+#endif
+
+
+static const char *__pyx_f[] = {
+  "nms\\gpu_nms.pyx",
+  "__init__.pxd",
+  "type.pxd",
+};
+/* BufferFormatStructs.proto */
+#define IS_UNSIGNED(type) (((type) -1) > 0)
+struct __Pyx_StructField_;
+#define __PYX_BUF_FLAGS_PACKED_STRUCT (1 << 0)
+typedef struct {
+  const char* name;
+  struct __Pyx_StructField_* fields;
+  size_t size;
+  size_t arraysize[8];
+  int ndim;
+  char typegroup;
+  char is_unsigned;
+  int flags;
+} __Pyx_TypeInfo;
+typedef struct __Pyx_StructField_ {
+  __Pyx_TypeInfo* type;
+  const char* name;
+  size_t offset;
+} __Pyx_StructField;
+typedef struct {
+  __Pyx_StructField* field;
+  size_t parent_offset;
+} __Pyx_BufFmt_StackElem;
+typedef struct {
+  __Pyx_StructField root;
+  __Pyx_BufFmt_StackElem* head;
+  size_t fmt_offset;
+  size_t new_count, enc_count;
+  size_t struct_alignment;
+  int is_complex;
+  char enc_type;
+  char new_packmode;
+  char enc_packmode;
+  char is_valid_array;
+} __Pyx_BufFmt_Context;
+
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":725
+ * # in Cython to enable them only on the right systems.
+ * 
+ * ctypedef npy_int8       int8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ */
+typedef npy_int8 __pyx_t_5numpy_int8_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":726
+ * 
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t
+ */
+typedef npy_int16 __pyx_t_5numpy_int16_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":727
+ * ctypedef npy_int8       int8_t
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_int64      int64_t
+ * #ctypedef npy_int96      int96_t
+ */
+typedef npy_int32 __pyx_t_5numpy_int32_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":728
+ * ctypedef npy_int16      int16_t
+ * ctypedef npy_int32      int32_t
+ * ctypedef npy_int64      int64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_int96      int96_t
+ * #ctypedef npy_int128     int128_t
+ */
+typedef npy_int64 __pyx_t_5numpy_int64_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":732
+ * #ctypedef npy_int128     int128_t
+ * 
+ * ctypedef npy_uint8      uint8_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ */
+typedef npy_uint8 __pyx_t_5numpy_uint8_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":733
+ * 
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t
+ */
+typedef npy_uint16 __pyx_t_5numpy_uint16_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":734
+ * ctypedef npy_uint8      uint8_t
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uint64     uint64_t
+ * #ctypedef npy_uint96     uint96_t
+ */
+typedef npy_uint32 __pyx_t_5numpy_uint32_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":735
+ * ctypedef npy_uint16     uint16_t
+ * ctypedef npy_uint32     uint32_t
+ * ctypedef npy_uint64     uint64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_uint96     uint96_t
+ * #ctypedef npy_uint128    uint128_t
+ */
+typedef npy_uint64 __pyx_t_5numpy_uint64_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":739
+ * #ctypedef npy_uint128    uint128_t
+ * 
+ * ctypedef npy_float32    float32_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_float64    float64_t
+ * #ctypedef npy_float80    float80_t
+ */
+typedef npy_float32 __pyx_t_5numpy_float32_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":740
+ * 
+ * ctypedef npy_float32    float32_t
+ * ctypedef npy_float64    float64_t             # <<<<<<<<<<<<<<
+ * #ctypedef npy_float80    float80_t
+ * #ctypedef npy_float128   float128_t
+ */
+typedef npy_float64 __pyx_t_5numpy_float64_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":749
+ * # The int types are mapped a bit surprising --
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t
+ */
+typedef npy_long __pyx_t_5numpy_int_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":750
+ * # numpy.int corresponds to 'l' and numpy.long to 'q'
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longlong   longlong_t
+ * 
+ */
+typedef npy_longlong __pyx_t_5numpy_long_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":751
+ * ctypedef npy_long       int_t
+ * ctypedef npy_longlong   long_t
+ * ctypedef npy_longlong   longlong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_ulong      uint_t
+ */
+typedef npy_longlong __pyx_t_5numpy_longlong_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":753
+ * ctypedef npy_longlong   longlong_t
+ * 
+ * ctypedef npy_ulong      uint_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t
+ */
+typedef npy_ulong __pyx_t_5numpy_uint_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":754
+ * 
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulong_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":755
+ * ctypedef npy_ulong      uint_t
+ * ctypedef npy_ulonglong  ulong_t
+ * ctypedef npy_ulonglong  ulonglong_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_intp       intp_t
+ */
+typedef npy_ulonglong __pyx_t_5numpy_ulonglong_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":757
+ * ctypedef npy_ulonglong  ulonglong_t
+ * 
+ * ctypedef npy_intp       intp_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_uintp      uintp_t
+ * 
+ */
+typedef npy_intp __pyx_t_5numpy_intp_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":758
+ * 
+ * ctypedef npy_intp       intp_t
+ * ctypedef npy_uintp      uintp_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_double     float_t
+ */
+typedef npy_uintp __pyx_t_5numpy_uintp_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":760
+ * ctypedef npy_uintp      uintp_t
+ * 
+ * ctypedef npy_double     float_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t
+ */
+typedef npy_double __pyx_t_5numpy_float_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":761
+ * 
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ */
+typedef npy_double __pyx_t_5numpy_double_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":762
+ * ctypedef npy_double     float_t
+ * ctypedef npy_double     double_t
+ * ctypedef npy_longdouble longdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ */
+typedef npy_longdouble __pyx_t_5numpy_longdouble_t;
+/* None.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< float > __pyx_t_float_complex;
+  #else
+    typedef float _Complex __pyx_t_float_complex;
+  #endif
+#else
+    typedef struct { float real, imag; } __pyx_t_float_complex;
+#endif
+
+/* None.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    typedef ::std::complex< double > __pyx_t_double_complex;
+  #else
+    typedef double _Complex __pyx_t_double_complex;
+  #endif
+#else
+    typedef struct { double real, imag; } __pyx_t_double_complex;
+#endif
+
+
+/*--- Type declarations ---*/
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":764
+ * ctypedef npy_longdouble longdouble_t
+ * 
+ * ctypedef npy_cfloat      cfloat_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t
+ */
+typedef npy_cfloat __pyx_t_5numpy_cfloat_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":765
+ * 
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t             # <<<<<<<<<<<<<<
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ */
+typedef npy_cdouble __pyx_t_5numpy_cdouble_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":766
+ * ctypedef npy_cfloat      cfloat_t
+ * ctypedef npy_cdouble     cdouble_t
+ * ctypedef npy_clongdouble clongdouble_t             # <<<<<<<<<<<<<<
+ * 
+ * ctypedef npy_cdouble     complex_t
+ */
+typedef npy_clongdouble __pyx_t_5numpy_clongdouble_t;
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":768
+ * ctypedef npy_clongdouble clongdouble_t
+ * 
+ * ctypedef npy_cdouble     complex_t             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ */
+typedef npy_cdouble __pyx_t_5numpy_complex_t;
+
+/* --- Runtime support code (head) --- */
+/* Refnanny.proto */
+#ifndef CYTHON_REFNANNY
+  #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+  typedef struct {
+    void (*INCREF)(void*, PyObject*, int);
+    void (*DECREF)(void*, PyObject*, int);
+    void (*GOTREF)(void*, PyObject*, int);
+    void (*GIVEREF)(void*, PyObject*, int);
+    void* (*SetupContext)(const char*, int, const char*);
+    void (*FinishContext)(void**);
+  } __Pyx_RefNannyAPIStruct;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+  static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
+  #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          if (acquire_gil) {\
+              PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+              PyGILState_Release(__pyx_gilstate_save);\
+          } else {\
+              __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
+          }
+#else
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)\
+          __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+  #define __Pyx_RefNannyFinishContext()\
+          __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+  #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+  #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+  #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+  #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+  #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+  #define __Pyx_RefNannyDeclarations
+  #define __Pyx_RefNannySetupContext(name, acquire_gil)
+  #define __Pyx_RefNannyFinishContext()
+  #define __Pyx_INCREF(r) Py_INCREF(r)
+  #define __Pyx_DECREF(r) Py_DECREF(r)
+  #define __Pyx_GOTREF(r)
+  #define __Pyx_GIVEREF(r)
+  #define __Pyx_XINCREF(r) Py_XINCREF(r)
+  #define __Pyx_XDECREF(r) Py_XDECREF(r)
+  #define __Pyx_XGOTREF(r)
+  #define __Pyx_XGIVEREF(r)
+#endif
+#define __Pyx_XDECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_XDECREF(tmp);\
+    } while (0)
+#define __Pyx_DECREF_SET(r, v) do {\
+        PyObject *tmp = (PyObject *) r;\
+        r = v; __Pyx_DECREF(tmp);\
+    } while (0)
+#define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+/* RaiseArgTupleInvalid.proto */
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+    Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found);
+
+/* RaiseDoubleKeywords.proto */
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name);
+
+/* ParseKeywords.proto */
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[],\
+    PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args,\
+    const char* function_name);
+
+/* ArgTypeTest.proto */
+static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+    const char *name, int exact);
+
+/* BufferFormatCheck.proto */
+static CYTHON_INLINE int  __Pyx_GetBufferAndValidate(Py_buffer* buf, PyObject* obj,
+    __Pyx_TypeInfo* dtype, int flags, int nd, int cast, __Pyx_BufFmt_StackElem* stack);
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info);
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts);
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type); // PROTO
+
+/* PyObjectGetAttrStr.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetAttrStr(PyObject* obj, PyObject* attr_name) {
+    PyTypeObject* tp = Py_TYPE(obj);
+    if (likely(tp->tp_getattro))
+        return tp->tp_getattro(obj, attr_name);
+#if PY_MAJOR_VERSION < 3
+    if (likely(tp->tp_getattr))
+        return tp->tp_getattr(obj, PyString_AS_STRING(attr_name));
+#endif
+    return PyObject_GetAttr(obj, attr_name);
+}
+#else
+#define __Pyx_PyObject_GetAttrStr(o,n) PyObject_GetAttr(o,n)
+#endif
+
+/* GetBuiltinName.proto */
+static PyObject *__Pyx_GetBuiltinName(PyObject *name);
+
+/* GetModuleGlobalName.proto */
+static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name);
+
+/* PyObjectCall.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw);
+#else
+#define __Pyx_PyObject_Call(func, arg, kw) PyObject_Call(func, arg, kw)
+#endif
+
+/* ExtTypeTest.proto */
+static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type);
+
+/* PyObjectCallMethO.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg);
+#endif
+
+/* PyObjectCallOneArg.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg);
+
+/* PyObjectCallNoArg.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func);
+#else
+#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL)
+#endif
+
+/* BufferIndexError.proto */
+static void __Pyx_RaiseBufferIndexError(int axis);
+
+#define __Pyx_BufPtrStrided1d(type, buf, i0, s0) (type)((char*)buf + i0 * s0)
+#define __Pyx_BufPtrStrided2d(type, buf, i0, s0, i1, s1) (type)((char*)buf + i0 * s0 + i1 * s1)
+/* SliceObject.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(
+        PyObject* obj, Py_ssize_t cstart, Py_ssize_t cstop,
+        PyObject** py_start, PyObject** py_stop, PyObject** py_slice,
+        int has_cstart, int has_cstop, int wraparound);
+
+/* BufferFallbackError.proto */
+static void __Pyx_RaiseBufferFallbackError(void);
+
+/* PyThreadStateGet.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_PyThreadState_declare  PyThreadState *__pyx_tstate;
+#define __Pyx_PyThreadState_assign  __pyx_tstate = PyThreadState_GET();
+#else
+#define __Pyx_PyThreadState_declare
+#define __Pyx_PyThreadState_assign
+#endif
+
+/* PyErrFetchRestore.proto */
+#if CYTHON_COMPILING_IN_CPYTHON
+#define __Pyx_ErrRestoreWithState(type, value, tb)  __Pyx_ErrRestoreInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)    __Pyx_ErrFetchInState(PyThreadState_GET(), type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  __Pyx_ErrRestoreInState(__pyx_tstate, type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)    __Pyx_ErrFetchInState(__pyx_tstate, type, value, tb)
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb);
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb);
+#else
+#define __Pyx_ErrRestoreWithState(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetchWithState(type, value, tb)  PyErr_Fetch(type, value, tb)
+#define __Pyx_ErrRestore(type, value, tb)  PyErr_Restore(type, value, tb)
+#define __Pyx_ErrFetch(type, value, tb)  PyErr_Fetch(type, value, tb)
+#endif
+
+/* RaiseException.proto */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause);
+
+/* DictGetItem.proto */
+#if PY_MAJOR_VERSION >= 3 && !CYTHON_COMPILING_IN_PYPY
+static PyObject *__Pyx_PyDict_GetItem(PyObject *d, PyObject* key) {
+    PyObject *value;
+    value = PyDict_GetItemWithError(d, key);
+    if (unlikely(!value)) {
+        if (!PyErr_Occurred()) {
+            PyObject* args = PyTuple_Pack(1, key);
+            if (likely(args))
+                PyErr_SetObject(PyExc_KeyError, args);
+            Py_XDECREF(args);
+        }
+        return NULL;
+    }
+    Py_INCREF(value);
+    return value;
+}
+#else
+    #define __Pyx_PyDict_GetItem(d, key) PyObject_GetItem(d, key)
+#endif
+
+/* RaiseTooManyValuesToUnpack.proto */
+static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected);
+
+/* RaiseNeedMoreValuesToUnpack.proto */
+static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index);
+
+/* RaiseNoneIterError.proto */
+static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void);
+
+/* Import.proto */
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level);
+
+/* CodeObjectCache.proto */
+typedef struct {
+    PyCodeObject* code_object;
+    int code_line;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+    int count;
+    int max_count;
+    __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+/* AddTraceback.proto */
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename);
+
+/* BufferStructDeclare.proto */
+typedef struct {
+  Py_ssize_t shape, strides, suboffsets;
+} __Pyx_Buf_DimInfo;
+typedef struct {
+  size_t refcount;
+  Py_buffer pybuffer;
+} __Pyx_Buffer;
+typedef struct {
+  __Pyx_Buffer *rcbuffer;
+  char *data;
+  __Pyx_Buf_DimInfo diminfo[8];
+} __Pyx_LocalBuf_ND;
+
+#if PY_MAJOR_VERSION < 3
+    static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags);
+    static void __Pyx_ReleaseBuffer(Py_buffer *view);
+#else
+    #define __Pyx_GetBuffer PyObject_GetBuffer
+    #define __Pyx_ReleaseBuffer PyBuffer_Release
+#endif
+
+
+/* None.proto */
+static Py_ssize_t __Pyx_zeros[] = {0, 0, 0, 0, 0, 0, 0, 0};
+static Py_ssize_t __Pyx_minusones[] = {-1, -1, -1, -1, -1, -1, -1, -1};
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value);
+
+/* None.proto */
+#if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    #define __Pyx_CREAL(z) ((z).real())
+    #define __Pyx_CIMAG(z) ((z).imag())
+  #else
+    #define __Pyx_CREAL(z) (__real__(z))
+    #define __Pyx_CIMAG(z) (__imag__(z))
+  #endif
+#else
+    #define __Pyx_CREAL(z) ((z).real)
+    #define __Pyx_CIMAG(z) ((z).imag)
+#endif
+#if defined(__cplusplus) && CYTHON_CCOMPLEX         && (defined(_WIN32) || defined(__clang__) || (defined(__GNUC__) && (__GNUC__ >= 5 || __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )) || __cplusplus >= 201103)
+    #define __Pyx_SET_CREAL(z,x) ((z).real(x))
+    #define __Pyx_SET_CIMAG(z,y) ((z).imag(y))
+#else
+    #define __Pyx_SET_CREAL(z,x) __Pyx_CREAL(z) = (x)
+    #define __Pyx_SET_CIMAG(z,y) __Pyx_CIMAG(z) = (y)
+#endif
+
+/* None.proto */
+static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float, float);
+
+/* None.proto */
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eqf(a, b)   ((a)==(b))
+    #define __Pyx_c_sumf(a, b)  ((a)+(b))
+    #define __Pyx_c_difff(a, b) ((a)-(b))
+    #define __Pyx_c_prodf(a, b) ((a)*(b))
+    #define __Pyx_c_quotf(a, b) ((a)/(b))
+    #define __Pyx_c_negf(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zerof(z) ((z)==(float)0)
+    #define __Pyx_c_conjf(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_absf(z)     (::std::abs(z))
+        #define __Pyx_c_powf(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zerof(z) ((z)==0)
+    #define __Pyx_c_conjf(z)    (conjf(z))
+    #if 1
+        #define __Pyx_c_absf(z)     (cabsf(z))
+        #define __Pyx_c_powf(a, b)  (cpowf(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prodf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex, __pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex);
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex);
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_absf(__pyx_t_float_complex);
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex, __pyx_t_float_complex);
+    #endif
+#endif
+
+/* None.proto */
+static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double, double);
+
+/* None.proto */
+#if CYTHON_CCOMPLEX
+    #define __Pyx_c_eq(a, b)   ((a)==(b))
+    #define __Pyx_c_sum(a, b)  ((a)+(b))
+    #define __Pyx_c_diff(a, b) ((a)-(b))
+    #define __Pyx_c_prod(a, b) ((a)*(b))
+    #define __Pyx_c_quot(a, b) ((a)/(b))
+    #define __Pyx_c_neg(a)     (-(a))
+  #ifdef __cplusplus
+    #define __Pyx_c_is_zero(z) ((z)==(double)0)
+    #define __Pyx_c_conj(z)    (::std::conj(z))
+    #if 1
+        #define __Pyx_c_abs(z)     (::std::abs(z))
+        #define __Pyx_c_pow(a, b)  (::std::pow(a, b))
+    #endif
+  #else
+    #define __Pyx_c_is_zero(z) ((z)==0)
+    #define __Pyx_c_conj(z)    (conj(z))
+    #if 1
+        #define __Pyx_c_abs(z)     (cabs(z))
+        #define __Pyx_c_pow(a, b)  (cpow(a, b))
+    #endif
+ #endif
+#else
+    static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex, __pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex);
+    static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex);
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex);
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs(__pyx_t_double_complex);
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex, __pyx_t_double_complex);
+    #endif
+#endif
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE npy_int32 __Pyx_PyInt_As_npy_int32(PyObject *);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
+
+/* CIntToPy.proto */
+static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
+
+/* CIntFromPy.proto */
+static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
+
+/* CheckBinaryVersion.proto */
+static int __Pyx_check_binary_version(void);
+
+/* PyIdentifierFromString.proto */
+#if !defined(__Pyx_PyIdentifier_FromString)
+#if PY_MAJOR_VERSION < 3
+  #define __Pyx_PyIdentifier_FromString(s) PyString_FromString(s)
+#else
+  #define __Pyx_PyIdentifier_FromString(s) PyUnicode_FromString(s)
+#endif
+#endif
+
+/* ModuleImport.proto */
+static PyObject *__Pyx_ImportModule(const char *name);
+
+/* TypeImport.proto */
+static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name, size_t size, int strict);
+
+/* InitStrings.proto */
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
+
+
+/* Module declarations from 'cpython.buffer' */
+
+/* Module declarations from 'libc.string' */
+
+/* Module declarations from 'libc.stdio' */
+
+/* Module declarations from '__builtin__' */
+
+/* Module declarations from 'cpython.type' */
+static PyTypeObject *__pyx_ptype_7cpython_4type_type = 0;
+
+/* Module declarations from 'cpython' */
+
+/* Module declarations from 'cpython.object' */
+
+/* Module declarations from 'cpython.ref' */
+
+/* Module declarations from 'libc.stdlib' */
+
+/* Module declarations from 'numpy' */
+
+/* Module declarations from 'numpy' */
+static PyTypeObject *__pyx_ptype_5numpy_dtype = 0;
+static PyTypeObject *__pyx_ptype_5numpy_flatiter = 0;
+static PyTypeObject *__pyx_ptype_5numpy_broadcast = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ndarray = 0;
+static PyTypeObject *__pyx_ptype_5numpy_ufunc = 0;
+static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *, char *, char *, int *); /*proto*/
+
+/* Module declarations from 'nms.gpu_nms' */
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t = { "float32_t", NULL, sizeof(__pyx_t_5numpy_float32_t), { 0 }, 0, 'R', 0, 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t = { "int32_t", NULL, sizeof(__pyx_t_5numpy_int32_t), { 0 }, 0, IS_UNSIGNED(__pyx_t_5numpy_int32_t) ? 'U' : 'I', IS_UNSIGNED(__pyx_t_5numpy_int32_t), 0 };
+static __Pyx_TypeInfo __Pyx_TypeInfo_nn___pyx_t_5numpy_intp_t = { "intp_t", NULL, sizeof(__pyx_t_5numpy_intp_t), { 0 }, 0, IS_UNSIGNED(__pyx_t_5numpy_intp_t) ? 'U' : 'I', IS_UNSIGNED(__pyx_t_5numpy_intp_t), 0 };
+#define __Pyx_MODULE_NAME "nms.gpu_nms"
+int __pyx_module_is_main_nms__gpu_nms = 0;
+
+/* Implementation of 'nms.gpu_nms' */
+static PyObject *__pyx_builtin_ValueError;
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_RuntimeError;
+static const char __pyx_k_np[] = "np";
+static const char __pyx_k_dets[] = "dets";
+static const char __pyx_k_keep[] = "keep";
+static const char __pyx_k_main[] = "__main__";
+static const char __pyx_k_test[] = "__test__";
+static const char __pyx_k_dtype[] = "dtype";
+static const char __pyx_k_int32[] = "int32";
+static const char __pyx_k_numpy[] = "numpy";
+static const char __pyx_k_order[] = "order";
+static const char __pyx_k_range[] = "range";
+static const char __pyx_k_zeros[] = "zeros";
+static const char __pyx_k_import[] = "__import__";
+static const char __pyx_k_scores[] = "scores";
+static const char __pyx_k_thresh[] = "thresh";
+static const char __pyx_k_argsort[] = "argsort";
+static const char __pyx_k_gpu_nms[] = "gpu_nms";
+static const char __pyx_k_num_out[] = "num_out";
+static const char __pyx_k_boxes_dim[] = "boxes_dim";
+static const char __pyx_k_boxes_num[] = "boxes_num";
+static const char __pyx_k_device_id[] = "device_id";
+static const char __pyx_k_ValueError[] = "ValueError";
+static const char __pyx_k_nms_gpu_nms[] = "nms.gpu_nms";
+static const char __pyx_k_sorted_dets[] = "sorted_dets";
+static const char __pyx_k_RuntimeError[] = "RuntimeError";
+static const char __pyx_k_ndarray_is_not_C_contiguous[] = "ndarray is not C contiguous";
+static const char __pyx_k_unknown_dtype_code_in_numpy_pxd[] = "unknown dtype code in numpy.pxd (%d)";
+static const char __pyx_k_D_v_zix_caffe_caffe_win_20160523[] = "D:\\v-zix\\caffe\\caffe-win-20160523\\models\\py-faster-rcnn-windows\\lib\\nms\\gpu_nms.pyx";
+static const char __pyx_k_Format_string_allocated_too_shor[] = "Format string allocated too short, see comment in numpy.pxd";
+static const char __pyx_k_Non_native_byte_order_not_suppor[] = "Non-native byte order not supported";
+static const char __pyx_k_ndarray_is_not_Fortran_contiguou[] = "ndarray is not Fortran contiguous";
+static const char __pyx_k_Format_string_allocated_too_shor_2[] = "Format string allocated too short.";
+static PyObject *__pyx_kp_s_D_v_zix_caffe_caffe_win_20160523;
+static PyObject *__pyx_kp_u_Format_string_allocated_too_shor;
+static PyObject *__pyx_kp_u_Format_string_allocated_too_shor_2;
+static PyObject *__pyx_kp_u_Non_native_byte_order_not_suppor;
+static PyObject *__pyx_n_s_RuntimeError;
+static PyObject *__pyx_n_s_ValueError;
+static PyObject *__pyx_n_s_argsort;
+static PyObject *__pyx_n_s_boxes_dim;
+static PyObject *__pyx_n_s_boxes_num;
+static PyObject *__pyx_n_s_dets;
+static PyObject *__pyx_n_s_device_id;
+static PyObject *__pyx_n_s_dtype;
+static PyObject *__pyx_n_s_gpu_nms;
+static PyObject *__pyx_n_s_import;
+static PyObject *__pyx_n_s_int32;
+static PyObject *__pyx_n_s_keep;
+static PyObject *__pyx_n_s_main;
+static PyObject *__pyx_kp_u_ndarray_is_not_C_contiguous;
+static PyObject *__pyx_kp_u_ndarray_is_not_Fortran_contiguou;
+static PyObject *__pyx_n_s_nms_gpu_nms;
+static PyObject *__pyx_n_s_np;
+static PyObject *__pyx_n_s_num_out;
+static PyObject *__pyx_n_s_numpy;
+static PyObject *__pyx_n_s_order;
+static PyObject *__pyx_n_s_range;
+static PyObject *__pyx_n_s_scores;
+static PyObject *__pyx_n_s_sorted_dets;
+static PyObject *__pyx_n_s_test;
+static PyObject *__pyx_n_s_thresh;
+static PyObject *__pyx_kp_u_unknown_dtype_code_in_numpy_pxd;
+static PyObject *__pyx_n_s_zeros;
+static PyObject *__pyx_pf_3nms_7gpu_nms_gpu_nms(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets, PyObject *__pyx_v_thresh, __pyx_t_5numpy_int32_t __pyx_v_device_id); /* proto */
+static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /* proto */
+static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info); /* proto */
+static PyObject *__pyx_int_4;
+static PyObject *__pyx_int_neg_1;
+static PyObject *__pyx_slice_;
+static PyObject *__pyx_slice__3;
+static PyObject *__pyx_slice__4;
+static PyObject *__pyx_tuple__2;
+static PyObject *__pyx_tuple__5;
+static PyObject *__pyx_tuple__6;
+static PyObject *__pyx_tuple__7;
+static PyObject *__pyx_tuple__8;
+static PyObject *__pyx_tuple__9;
+static PyObject *__pyx_tuple__10;
+static PyObject *__pyx_tuple__11;
+static PyObject *__pyx_codeobj__12;
+
+/* "nms/gpu_nms.pyx":16
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+
+/* Python wrapper */
+static PyObject *__pyx_pw_3nms_7gpu_nms_1gpu_nms(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static PyMethodDef __pyx_mdef_3nms_7gpu_nms_1gpu_nms = {"gpu_nms", (PyCFunction)__pyx_pw_3nms_7gpu_nms_1gpu_nms, METH_VARARGS|METH_KEYWORDS, 0};
+static PyObject *__pyx_pw_3nms_7gpu_nms_1gpu_nms(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+  PyArrayObject *__pyx_v_dets = 0;
+  PyObject *__pyx_v_thresh = 0;
+  __pyx_t_5numpy_int32_t __pyx_v_device_id;
+  PyObject *__pyx_r = 0;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("gpu_nms (wrapper)", 0);
+  {
+    static PyObject **__pyx_pyargnames[] = {&__pyx_n_s_dets,&__pyx_n_s_thresh,&__pyx_n_s_device_id,0};
+    PyObject* values[3] = {0,0,0};
+    if (unlikely(__pyx_kwds)) {
+      Py_ssize_t kw_args;
+      const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+      switch (pos_args) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        case  1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        case  0: break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+      kw_args = PyDict_Size(__pyx_kwds);
+      switch (pos_args) {
+        case  0:
+        if (likely((values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_dets)) != 0)) kw_args--;
+        else goto __pyx_L5_argtuple_error;
+        case  1:
+        if (likely((values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s_thresh)) != 0)) kw_args--;
+        else {
+          __Pyx_RaiseArgtupleInvalid("gpu_nms", 0, 2, 3, 1); __PYX_ERR(0, 16, __pyx_L3_error)
+        }
+        case  2:
+        if (kw_args > 0) {
+          PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s_device_id);
+          if (value) { values[2] = value; kw_args--; }
+        }
+      }
+      if (unlikely(kw_args > 0)) {
+        if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "gpu_nms") < 0)) __PYX_ERR(0, 16, __pyx_L3_error)
+      }
+    } else {
+      switch (PyTuple_GET_SIZE(__pyx_args)) {
+        case  3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+        case  2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+        values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+        break;
+        default: goto __pyx_L5_argtuple_error;
+      }
+    }
+    __pyx_v_dets = ((PyArrayObject *)values[0]);
+    __pyx_v_thresh = ((PyObject*)values[1]);
+    if (values[2]) {
+      __pyx_v_device_id = __Pyx_PyInt_As_npy_int32(values[2]); if (unlikely((__pyx_v_device_id == (npy_int32)-1) && PyErr_Occurred())) __PYX_ERR(0, 17, __pyx_L3_error)
+    } else {
+      __pyx_v_device_id = ((__pyx_t_5numpy_int32_t)0);
+    }
+  }
+  goto __pyx_L4_argument_unpacking_done;
+  __pyx_L5_argtuple_error:;
+  __Pyx_RaiseArgtupleInvalid("gpu_nms", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); __PYX_ERR(0, 16, __pyx_L3_error)
+  __pyx_L3_error:;
+  __Pyx_AddTraceback("nms.gpu_nms.gpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __Pyx_RefNannyFinishContext();
+  return NULL;
+  __pyx_L4_argument_unpacking_done:;
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_dets), __pyx_ptype_5numpy_ndarray, 1, "dets", 0))) __PYX_ERR(0, 16, __pyx_L1_error)
+  if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_thresh), (&PyFloat_Type), 1, "thresh", 1))) __PYX_ERR(0, 16, __pyx_L1_error)
+  __pyx_r = __pyx_pf_3nms_7gpu_nms_gpu_nms(__pyx_self, __pyx_v_dets, __pyx_v_thresh, __pyx_v_device_id);
+
+  /* function exit code */
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyObject *__pyx_pf_3nms_7gpu_nms_gpu_nms(CYTHON_UNUSED PyObject *__pyx_self, PyArrayObject *__pyx_v_dets, PyObject *__pyx_v_thresh, __pyx_t_5numpy_int32_t __pyx_v_device_id) {
+  int __pyx_v_boxes_num;
+  int __pyx_v_boxes_dim;
+  int __pyx_v_num_out;
+  PyArrayObject *__pyx_v_keep = 0;
+  PyArrayObject *__pyx_v_scores = 0;
+  PyArrayObject *__pyx_v_order = 0;
+  PyArrayObject *__pyx_v_sorted_dets = 0;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_dets;
+  __Pyx_Buffer __pyx_pybuffer_dets;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_keep;
+  __Pyx_Buffer __pyx_pybuffer_keep;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_order;
+  __Pyx_Buffer __pyx_pybuffer_order;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_scores;
+  __Pyx_Buffer __pyx_pybuffer_scores;
+  __Pyx_LocalBuf_ND __pyx_pybuffernd_sorted_dets;
+  __Pyx_Buffer __pyx_pybuffer_sorted_dets;
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  PyObject *__pyx_t_2 = NULL;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  PyObject *__pyx_t_5 = NULL;
+  PyArrayObject *__pyx_t_6 = NULL;
+  PyArrayObject *__pyx_t_7 = NULL;
+  PyArrayObject *__pyx_t_8 = NULL;
+  PyArrayObject *__pyx_t_9 = NULL;
+  Py_ssize_t __pyx_t_10;
+  int __pyx_t_11;
+  Py_ssize_t __pyx_t_12;
+  Py_ssize_t __pyx_t_13;
+  float __pyx_t_14;
+  PyObject *__pyx_t_15 = NULL;
+  PyObject *__pyx_t_16 = NULL;
+  PyObject *__pyx_t_17 = NULL;
+  __Pyx_RefNannySetupContext("gpu_nms", 0);
+  __pyx_pybuffer_keep.pybuffer.buf = NULL;
+  __pyx_pybuffer_keep.refcount = 0;
+  __pyx_pybuffernd_keep.data = NULL;
+  __pyx_pybuffernd_keep.rcbuffer = &__pyx_pybuffer_keep;
+  __pyx_pybuffer_scores.pybuffer.buf = NULL;
+  __pyx_pybuffer_scores.refcount = 0;
+  __pyx_pybuffernd_scores.data = NULL;
+  __pyx_pybuffernd_scores.rcbuffer = &__pyx_pybuffer_scores;
+  __pyx_pybuffer_order.pybuffer.buf = NULL;
+  __pyx_pybuffer_order.refcount = 0;
+  __pyx_pybuffernd_order.data = NULL;
+  __pyx_pybuffernd_order.rcbuffer = &__pyx_pybuffer_order;
+  __pyx_pybuffer_sorted_dets.pybuffer.buf = NULL;
+  __pyx_pybuffer_sorted_dets.refcount = 0;
+  __pyx_pybuffernd_sorted_dets.data = NULL;
+  __pyx_pybuffernd_sorted_dets.rcbuffer = &__pyx_pybuffer_sorted_dets;
+  __pyx_pybuffer_dets.pybuffer.buf = NULL;
+  __pyx_pybuffer_dets.refcount = 0;
+  __pyx_pybuffernd_dets.data = NULL;
+  __pyx_pybuffernd_dets.rcbuffer = &__pyx_pybuffer_dets;
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_dets.rcbuffer->pybuffer, (PyObject*)__pyx_v_dets, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) __PYX_ERR(0, 16, __pyx_L1_error)
+  }
+  __pyx_pybuffernd_dets.diminfo[0].strides = __pyx_pybuffernd_dets.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_dets.diminfo[0].shape = __pyx_pybuffernd_dets.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_dets.diminfo[1].strides = __pyx_pybuffernd_dets.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_dets.diminfo[1].shape = __pyx_pybuffernd_dets.rcbuffer->pybuffer.shape[1];
+
+  /* "nms/gpu_nms.pyx":18
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]             # <<<<<<<<<<<<<<
+ *     cdef int boxes_dim = dets.shape[1]
+ *     cdef int num_out
+ */
+  __pyx_v_boxes_num = (__pyx_v_dets->dimensions[0]);
+
+  /* "nms/gpu_nms.pyx":19
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ *     cdef int boxes_dim = dets.shape[1]             # <<<<<<<<<<<<<<
+ *     cdef int num_out
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ */
+  __pyx_v_boxes_dim = (__pyx_v_dets->dimensions[1]);
+
+  /* "nms/gpu_nms.pyx":22
+ *     cdef int num_out
+ *     cdef np.ndarray[np.int32_t, ndim=1] \
+ *         keep = np.zeros(boxes_num, dtype=np.int32)             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.float32_t, ndim=1] \
+ *         scores = dets[:, 4]
+ */
+  __pyx_t_1 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_2 = __Pyx_PyObject_GetAttrStr(__pyx_t_1, __pyx_n_s_zeros); if (unlikely(!__pyx_t_2)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_2);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = __Pyx_PyInt_From_int(__pyx_v_boxes_num); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_3);
+  __Pyx_GIVEREF(__pyx_t_1);
+  PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_1);
+  __pyx_t_1 = 0;
+  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_4 = __Pyx_GetModuleGlobalName(__pyx_n_s_np); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_4);
+  __pyx_t_5 = __Pyx_PyObject_GetAttrStr(__pyx_t_4, __pyx_n_s_int32); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+  if (PyDict_SetItem(__pyx_t_1, __pyx_n_s_dtype, __pyx_t_5) < 0) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __pyx_t_5 = __Pyx_PyObject_Call(__pyx_t_2, __pyx_t_3, __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 22, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+  __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 22, __pyx_L1_error)
+  __pyx_t_6 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_keep.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+      __pyx_v_keep = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_keep.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 21, __pyx_L1_error)
+    } else {__pyx_pybuffernd_keep.diminfo[0].strides = __pyx_pybuffernd_keep.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_keep.diminfo[0].shape = __pyx_pybuffernd_keep.rcbuffer->pybuffer.shape[0];
+    }
+  }
+  __pyx_t_6 = 0;
+  __pyx_v_keep = ((PyArrayObject *)__pyx_t_5);
+  __pyx_t_5 = 0;
+
+  /* "nms/gpu_nms.pyx":24
+ *         keep = np.zeros(boxes_num, dtype=np.int32)
+ *     cdef np.ndarray[np.float32_t, ndim=1] \
+ *         scores = dets[:, 4]             # <<<<<<<<<<<<<<
+ *     #cdef np.ndarray[np.int_t, ndim=1] \  // 20160601, by xzn
+ *     #    order = scores.argsort()[::-1]
+ */
+  __pyx_t_5 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_tuple__2); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 24, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 24, __pyx_L1_error)
+  __pyx_t_7 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_scores.rcbuffer->pybuffer, (PyObject*)__pyx_t_7, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+      __pyx_v_scores = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_scores.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 23, __pyx_L1_error)
+    } else {__pyx_pybuffernd_scores.diminfo[0].strides = __pyx_pybuffernd_scores.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_scores.diminfo[0].shape = __pyx_pybuffernd_scores.rcbuffer->pybuffer.shape[0];
+    }
+  }
+  __pyx_t_7 = 0;
+  __pyx_v_scores = ((PyArrayObject *)__pyx_t_5);
+  __pyx_t_5 = 0;
+
+  /* "nms/gpu_nms.pyx":28
+ *     #    order = scores.argsort()[::-1]
+ *     cdef np.ndarray[np.intp_t, ndim=1] \
+ *         order = scores.argsort()[::-1]             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]
+ */
+  __pyx_t_1 = __Pyx_PyObject_GetAttrStr(((PyObject *)__pyx_v_scores), __pyx_n_s_argsort); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_t_3 = NULL;
+  if (CYTHON_COMPILING_IN_CPYTHON && likely(PyMethod_Check(__pyx_t_1))) {
+    __pyx_t_3 = PyMethod_GET_SELF(__pyx_t_1);
+    if (likely(__pyx_t_3)) {
+      PyObject* function = PyMethod_GET_FUNCTION(__pyx_t_1);
+      __Pyx_INCREF(__pyx_t_3);
+      __Pyx_INCREF(function);
+      __Pyx_DECREF_SET(__pyx_t_1, function);
+    }
+  }
+  if (__pyx_t_3) {
+    __pyx_t_5 = __Pyx_PyObject_CallOneArg(__pyx_t_1, __pyx_t_3); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 28, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+  } else {
+    __pyx_t_5 = __Pyx_PyObject_CallNoArg(__pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 28, __pyx_L1_error)
+  }
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  __pyx_t_1 = PyObject_GetItem(__pyx_t_5, __pyx_slice__3); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 28, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  if (!(likely(((__pyx_t_1) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_1, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 28, __pyx_L1_error)
+  __pyx_t_8 = ((PyArrayObject *)__pyx_t_1);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_order.rcbuffer->pybuffer, (PyObject*)__pyx_t_8, &__Pyx_TypeInfo_nn___pyx_t_5numpy_intp_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+      __pyx_v_order = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_order.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 27, __pyx_L1_error)
+    } else {__pyx_pybuffernd_order.diminfo[0].strides = __pyx_pybuffernd_order.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_order.diminfo[0].shape = __pyx_pybuffernd_order.rcbuffer->pybuffer.shape[0];
+    }
+  }
+  __pyx_t_8 = 0;
+  __pyx_v_order = ((PyArrayObject *)__pyx_t_1);
+  __pyx_t_1 = 0;
+
+  /* "nms/gpu_nms.pyx":30
+ *         order = scores.argsort()[::-1]
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]             # <<<<<<<<<<<<<<
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]
+ */
+  __pyx_t_1 = PyTuple_New(2); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 30, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_INCREF(((PyObject *)__pyx_v_order));
+  __Pyx_GIVEREF(((PyObject *)__pyx_v_order));
+  PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_order));
+  __Pyx_INCREF(__pyx_slice__4);
+  __Pyx_GIVEREF(__pyx_slice__4);
+  PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_slice__4);
+  __pyx_t_5 = PyObject_GetItem(((PyObject *)__pyx_v_dets), __pyx_t_1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 30, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 30, __pyx_L1_error)
+  __pyx_t_9 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer, (PyObject*)__pyx_t_9, &__Pyx_TypeInfo_nn___pyx_t_5numpy_float32_t, PyBUF_FORMAT| PyBUF_STRIDES, 2, 0, __pyx_stack) == -1)) {
+      __pyx_v_sorted_dets = ((PyArrayObject *)Py_None); __Pyx_INCREF(Py_None); __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.buf = NULL;
+      __PYX_ERR(0, 29, __pyx_L1_error)
+    } else {__pyx_pybuffernd_sorted_dets.diminfo[0].strides = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_sorted_dets.diminfo[0].shape = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.shape[0]; __pyx_pybuffernd_sorted_dets.diminfo[1].strides = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.strides[1]; __pyx_pybuffernd_sorted_dets.diminfo[1].shape = __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.shape[1];
+    }
+  }
+  __pyx_t_9 = 0;
+  __pyx_v_sorted_dets = ((PyArrayObject *)__pyx_t_5);
+  __pyx_t_5 = 0;
+
+  /* "nms/gpu_nms.pyx":31
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)             # <<<<<<<<<<<<<<
+ *     keep = keep[:num_out]
+ *     return list(order[keep])
+ */
+  __pyx_t_10 = 0;
+  __pyx_t_11 = -1;
+  if (__pyx_t_10 < 0) {
+    __pyx_t_10 += __pyx_pybuffernd_keep.diminfo[0].shape;
+    if (unlikely(__pyx_t_10 < 0)) __pyx_t_11 = 0;
+  } else if (unlikely(__pyx_t_10 >= __pyx_pybuffernd_keep.diminfo[0].shape)) __pyx_t_11 = 0;
+  if (unlikely(__pyx_t_11 != -1)) {
+    __Pyx_RaiseBufferIndexError(__pyx_t_11);
+    __PYX_ERR(0, 31, __pyx_L1_error)
+  }
+  __pyx_t_12 = 0;
+  __pyx_t_13 = 0;
+  __pyx_t_11 = -1;
+  if (__pyx_t_12 < 0) {
+    __pyx_t_12 += __pyx_pybuffernd_sorted_dets.diminfo[0].shape;
+    if (unlikely(__pyx_t_12 < 0)) __pyx_t_11 = 0;
+  } else if (unlikely(__pyx_t_12 >= __pyx_pybuffernd_sorted_dets.diminfo[0].shape)) __pyx_t_11 = 0;
+  if (__pyx_t_13 < 0) {
+    __pyx_t_13 += __pyx_pybuffernd_sorted_dets.diminfo[1].shape;
+    if (unlikely(__pyx_t_13 < 0)) __pyx_t_11 = 1;
+  } else if (unlikely(__pyx_t_13 >= __pyx_pybuffernd_sorted_dets.diminfo[1].shape)) __pyx_t_11 = 1;
+  if (unlikely(__pyx_t_11 != -1)) {
+    __Pyx_RaiseBufferIndexError(__pyx_t_11);
+    __PYX_ERR(0, 31, __pyx_L1_error)
+  }
+  __pyx_t_14 = __pyx_PyFloat_AsFloat(__pyx_v_thresh); if (unlikely((__pyx_t_14 == (float)-1) && PyErr_Occurred())) __PYX_ERR(0, 31, __pyx_L1_error)
+  _nms((&(*__Pyx_BufPtrStrided1d(__pyx_t_5numpy_int32_t *, __pyx_pybuffernd_keep.rcbuffer->pybuffer.buf, __pyx_t_10, __pyx_pybuffernd_keep.diminfo[0].strides))), (&__pyx_v_num_out), (&(*__Pyx_BufPtrStrided2d(__pyx_t_5numpy_float32_t *, __pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer.buf, __pyx_t_12, __pyx_pybuffernd_sorted_dets.diminfo[0].strides, __pyx_t_13, __pyx_pybuffernd_sorted_dets.diminfo[1].strides))), __pyx_v_boxes_num, __pyx_v_boxes_dim, __pyx_t_14, __pyx_v_device_id);
+
+  /* "nms/gpu_nms.pyx":32
+ *         sorted_dets = dets[order, :]
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]             # <<<<<<<<<<<<<<
+ *     return list(order[keep])
+ */
+  __pyx_t_5 = __Pyx_PyObject_GetSlice(((PyObject *)__pyx_v_keep), 0, __pyx_v_num_out, NULL, NULL, NULL, 0, 1, 1); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 32, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  if (!(likely(((__pyx_t_5) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_5, __pyx_ptype_5numpy_ndarray))))) __PYX_ERR(0, 32, __pyx_L1_error)
+  __pyx_t_6 = ((PyArrayObject *)__pyx_t_5);
+  {
+    __Pyx_BufFmt_StackElem __pyx_stack[1];
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_keep.rcbuffer->pybuffer);
+    __pyx_t_11 = __Pyx_GetBufferAndValidate(&__pyx_pybuffernd_keep.rcbuffer->pybuffer, (PyObject*)__pyx_t_6, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack);
+    if (unlikely(__pyx_t_11 < 0)) {
+      PyErr_Fetch(&__pyx_t_15, &__pyx_t_16, &__pyx_t_17);
+      if (unlikely(__Pyx_GetBufferAndValidate(&__pyx_pybuffernd_keep.rcbuffer->pybuffer, (PyObject*)__pyx_v_keep, &__Pyx_TypeInfo_nn___pyx_t_5numpy_int32_t, PyBUF_FORMAT| PyBUF_STRIDES, 1, 0, __pyx_stack) == -1)) {
+        Py_XDECREF(__pyx_t_15); Py_XDECREF(__pyx_t_16); Py_XDECREF(__pyx_t_17);
+        __Pyx_RaiseBufferFallbackError();
+      } else {
+        PyErr_Restore(__pyx_t_15, __pyx_t_16, __pyx_t_17);
+      }
+    }
+    __pyx_pybuffernd_keep.diminfo[0].strides = __pyx_pybuffernd_keep.rcbuffer->pybuffer.strides[0]; __pyx_pybuffernd_keep.diminfo[0].shape = __pyx_pybuffernd_keep.rcbuffer->pybuffer.shape[0];
+    if (unlikely(__pyx_t_11 < 0)) __PYX_ERR(0, 32, __pyx_L1_error)
+  }
+  __pyx_t_6 = 0;
+  __Pyx_DECREF_SET(__pyx_v_keep, ((PyArrayObject *)__pyx_t_5));
+  __pyx_t_5 = 0;
+
+  /* "nms/gpu_nms.pyx":33
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]
+ *     return list(order[keep])             # <<<<<<<<<<<<<<
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_5 = PyObject_GetItem(((PyObject *)__pyx_v_order), ((PyObject *)__pyx_v_keep)); if (unlikely(!__pyx_t_5)) __PYX_ERR(0, 33, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_5);
+  __pyx_t_1 = PySequence_List(__pyx_t_5); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 33, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "nms/gpu_nms.pyx":16
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_2);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_XDECREF(__pyx_t_5);
+  { PyObject *__pyx_type, *__pyx_value, *__pyx_tb;
+    __Pyx_PyThreadState_declare
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrFetch(&__pyx_type, &__pyx_value, &__pyx_tb);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_keep.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_order.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_scores.rcbuffer->pybuffer);
+    __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer);
+  __Pyx_ErrRestore(__pyx_type, __pyx_value, __pyx_tb);}
+  __Pyx_AddTraceback("nms.gpu_nms.gpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  goto __pyx_L2;
+  __pyx_L0:;
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_dets.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_keep.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_order.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_scores.rcbuffer->pybuffer);
+  __Pyx_SafeReleaseBuffer(&__pyx_pybuffernd_sorted_dets.rcbuffer->pybuffer);
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_keep);
+  __Pyx_XDECREF((PyObject *)__pyx_v_scores);
+  __Pyx_XDECREF((PyObject *)__pyx_v_order);
+  __Pyx_XDECREF((PyObject *)__pyx_v_sorted_dets);
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":197
+ *         # experimental exception made for __getbuffer__ and __releasebuffer__
+ *         # -- the details of this may change.
+ *         def __getbuffer__(ndarray self, Py_buffer* info, int flags):             # <<<<<<<<<<<<<<
+ *             # This implementation of getbuffer is geared towards Cython
+ *             # requirements, and does not yet fullfill the PEP.
+ */
+
+/* Python wrapper */
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags); /*proto*/
+static CYTHON_UNUSED int __pyx_pw_5numpy_7ndarray_1__getbuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__getbuffer__ (wrapper)", 0);
+  __pyx_r = __pyx_pf_5numpy_7ndarray___getbuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info), ((int)__pyx_v_flags));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static int __pyx_pf_5numpy_7ndarray___getbuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info, int __pyx_v_flags) {
+  int __pyx_v_copy_shape;
+  int __pyx_v_i;
+  int __pyx_v_ndim;
+  int __pyx_v_endian_detector;
+  int __pyx_v_little_endian;
+  int __pyx_v_t;
+  char *__pyx_v_f;
+  PyArray_Descr *__pyx_v_descr = 0;
+  int __pyx_v_offset;
+  int __pyx_v_hasfields;
+  int __pyx_r;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  int __pyx_t_4;
+  int __pyx_t_5;
+  PyObject *__pyx_t_6 = NULL;
+  char *__pyx_t_7;
+  __Pyx_RefNannySetupContext("__getbuffer__", 0);
+  if (__pyx_v_info != NULL) {
+    __pyx_v_info->obj = Py_None; __Pyx_INCREF(Py_None);
+    __Pyx_GIVEREF(__pyx_v_info->obj);
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":203
+ *             # of flags
+ * 
+ *             if info == NULL: return             # <<<<<<<<<<<<<<
+ * 
+ *             cdef int copy_shape, i, ndim
+ */
+  __pyx_t_1 = ((__pyx_v_info == NULL) != 0);
+  if (__pyx_t_1) {
+    __pyx_r = 0;
+    goto __pyx_L0;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":206
+ * 
+ *             cdef int copy_shape, i, ndim
+ *             cdef int endian_detector = 1             # <<<<<<<<<<<<<<
+ *             cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+ * 
+ */
+  __pyx_v_endian_detector = 1;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":207
+ *             cdef int copy_shape, i, ndim
+ *             cdef int endian_detector = 1
+ *             cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)             # <<<<<<<<<<<<<<
+ * 
+ *             ndim = PyArray_NDIM(self)
+ */
+  __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":209
+ *             cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+ * 
+ *             ndim = PyArray_NDIM(self)             # <<<<<<<<<<<<<<
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ */
+  __pyx_v_ndim = PyArray_NDIM(__pyx_v_self);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":211
+ *             ndim = PyArray_NDIM(self)
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 copy_shape = 1
+ *             else:
+ */
+  __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0);
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":212
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 copy_shape = 1             # <<<<<<<<<<<<<<
+ *             else:
+ *                 copy_shape = 0
+ */
+    __pyx_v_copy_shape = 1;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":211
+ *             ndim = PyArray_NDIM(self)
+ * 
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 copy_shape = 1
+ *             else:
+ */
+    goto __pyx_L4;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":214
+ *                 copy_shape = 1
+ *             else:
+ *                 copy_shape = 0             # <<<<<<<<<<<<<<
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ */
+  /*else*/ {
+    __pyx_v_copy_shape = 0;
+  }
+  __pyx_L4:;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":216
+ *                 copy_shape = 0
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ */
+  __pyx_t_2 = (((__pyx_v_flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) != 0);
+  if (__pyx_t_2) {
+  } else {
+    __pyx_t_1 = __pyx_t_2;
+    goto __pyx_L6_bool_binop_done;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":217
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):             # <<<<<<<<<<<<<<
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ * 
+ */
+  __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_C_CONTIGUOUS) != 0)) != 0);
+  __pyx_t_1 = __pyx_t_2;
+  __pyx_L6_bool_binop_done:;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":216
+ *                 copy_shape = 0
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ */
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":218
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__5, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 218, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(1, 218, __pyx_L1_error)
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":216
+ *                 copy_shape = 0
+ * 
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ */
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":220
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ */
+  __pyx_t_2 = (((__pyx_v_flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) != 0);
+  if (__pyx_t_2) {
+  } else {
+    __pyx_t_1 = __pyx_t_2;
+    goto __pyx_L9_bool_binop_done;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":221
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):             # <<<<<<<<<<<<<<
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ * 
+ */
+  __pyx_t_2 = ((!(PyArray_CHKFLAGS(__pyx_v_self, NPY_F_CONTIGUOUS) != 0)) != 0);
+  __pyx_t_1 = __pyx_t_2;
+  __pyx_L9_bool_binop_done:;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":220
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ */
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":222
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             info.buf = PyArray_DATA(self)
+ */
+    __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__6, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 222, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __PYX_ERR(1, 222, __pyx_L1_error)
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":220
+ *                 raise ValueError(u"ndarray is not C contiguous")
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)             # <<<<<<<<<<<<<<
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ */
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":224
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")
+ * 
+ *             info.buf = PyArray_DATA(self)             # <<<<<<<<<<<<<<
+ *             info.ndim = ndim
+ *             if copy_shape:
+ */
+  __pyx_v_info->buf = PyArray_DATA(__pyx_v_self);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":225
+ * 
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = ndim             # <<<<<<<<<<<<<<
+ *             if copy_shape:
+ *                 # Allocate new buffer for strides and shape info.
+ */
+  __pyx_v_info->ndim = __pyx_v_ndim;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":226
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = ndim
+ *             if copy_shape:             # <<<<<<<<<<<<<<
+ *                 # Allocate new buffer for strides and shape info.
+ *                 # This is allocated as one block, strides first.
+ */
+  __pyx_t_1 = (__pyx_v_copy_shape != 0);
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":229
+ *                 # Allocate new buffer for strides and shape info.
+ *                 # This is allocated as one block, strides first.
+ *                 info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)             # <<<<<<<<<<<<<<
+ *                 info.shape = info.strides + ndim
+ *                 for i in range(ndim):
+ */
+    __pyx_v_info->strides = ((Py_ssize_t *)malloc((((sizeof(Py_ssize_t)) * ((size_t)__pyx_v_ndim)) * 2)));
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":230
+ *                 # This is allocated as one block, strides first.
+ *                 info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)
+ *                 info.shape = info.strides + ndim             # <<<<<<<<<<<<<<
+ *                 for i in range(ndim):
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]
+ */
+    __pyx_v_info->shape = (__pyx_v_info->strides + __pyx_v_ndim);
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":231
+ *                 info.strides = <Py_ssize_t*>stdlib.malloc(sizeof(Py_ssize_t) * <size_t>ndim * 2)
+ *                 info.shape = info.strides + ndim
+ *                 for i in range(ndim):             # <<<<<<<<<<<<<<
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]
+ *                     info.shape[i] = PyArray_DIMS(self)[i]
+ */
+    __pyx_t_4 = __pyx_v_ndim;
+    for (__pyx_t_5 = 0; __pyx_t_5 < __pyx_t_4; __pyx_t_5+=1) {
+      __pyx_v_i = __pyx_t_5;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":232
+ *                 info.shape = info.strides + ndim
+ *                 for i in range(ndim):
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]             # <<<<<<<<<<<<<<
+ *                     info.shape[i] = PyArray_DIMS(self)[i]
+ *             else:
+ */
+      (__pyx_v_info->strides[__pyx_v_i]) = (PyArray_STRIDES(__pyx_v_self)[__pyx_v_i]);
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":233
+ *                 for i in range(ndim):
+ *                     info.strides[i] = PyArray_STRIDES(self)[i]
+ *                     info.shape[i] = PyArray_DIMS(self)[i]             # <<<<<<<<<<<<<<
+ *             else:
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ */
+      (__pyx_v_info->shape[__pyx_v_i]) = (PyArray_DIMS(__pyx_v_self)[__pyx_v_i]);
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":226
+ *             info.buf = PyArray_DATA(self)
+ *             info.ndim = ndim
+ *             if copy_shape:             # <<<<<<<<<<<<<<
+ *                 # Allocate new buffer for strides and shape info.
+ *                 # This is allocated as one block, strides first.
+ */
+    goto __pyx_L11;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":235
+ *                     info.shape[i] = PyArray_DIMS(self)[i]
+ *             else:
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)             # <<<<<<<<<<<<<<
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL
+ */
+  /*else*/ {
+    __pyx_v_info->strides = ((Py_ssize_t *)PyArray_STRIDES(__pyx_v_self));
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":236
+ *             else:
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)             # <<<<<<<<<<<<<<
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ */
+    __pyx_v_info->shape = ((Py_ssize_t *)PyArray_DIMS(__pyx_v_self));
+  }
+  __pyx_L11:;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":237
+ *                 info.strides = <Py_ssize_t*>PyArray_STRIDES(self)
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL             # <<<<<<<<<<<<<<
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ *             info.readonly = not PyArray_ISWRITEABLE(self)
+ */
+  __pyx_v_info->suboffsets = NULL;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":238
+ *                 info.shape = <Py_ssize_t*>PyArray_DIMS(self)
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)             # <<<<<<<<<<<<<<
+ *             info.readonly = not PyArray_ISWRITEABLE(self)
+ * 
+ */
+  __pyx_v_info->itemsize = PyArray_ITEMSIZE(__pyx_v_self);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":239
+ *             info.suboffsets = NULL
+ *             info.itemsize = PyArray_ITEMSIZE(self)
+ *             info.readonly = not PyArray_ISWRITEABLE(self)             # <<<<<<<<<<<<<<
+ * 
+ *             cdef int t
+ */
+  __pyx_v_info->readonly = (!(PyArray_ISWRITEABLE(__pyx_v_self) != 0));
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":242
+ * 
+ *             cdef int t
+ *             cdef char* f = NULL             # <<<<<<<<<<<<<<
+ *             cdef dtype descr = self.descr
+ *             cdef int offset
+ */
+  __pyx_v_f = NULL;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":243
+ *             cdef int t
+ *             cdef char* f = NULL
+ *             cdef dtype descr = self.descr             # <<<<<<<<<<<<<<
+ *             cdef int offset
+ * 
+ */
+  __pyx_t_3 = ((PyObject *)__pyx_v_self->descr);
+  __Pyx_INCREF(__pyx_t_3);
+  __pyx_v_descr = ((PyArray_Descr *)__pyx_t_3);
+  __pyx_t_3 = 0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":246
+ *             cdef int offset
+ * 
+ *             cdef bint hasfields = PyDataType_HASFIELDS(descr)             # <<<<<<<<<<<<<<
+ * 
+ *             if not hasfields and not copy_shape:
+ */
+  __pyx_v_hasfields = PyDataType_HASFIELDS(__pyx_v_descr);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":248
+ *             cdef bint hasfields = PyDataType_HASFIELDS(descr)
+ * 
+ *             if not hasfields and not copy_shape:             # <<<<<<<<<<<<<<
+ *                 # do not call releasebuffer
+ *                 info.obj = None
+ */
+  __pyx_t_2 = ((!(__pyx_v_hasfields != 0)) != 0);
+  if (__pyx_t_2) {
+  } else {
+    __pyx_t_1 = __pyx_t_2;
+    goto __pyx_L15_bool_binop_done;
+  }
+  __pyx_t_2 = ((!(__pyx_v_copy_shape != 0)) != 0);
+  __pyx_t_1 = __pyx_t_2;
+  __pyx_L15_bool_binop_done:;
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":250
+ *             if not hasfields and not copy_shape:
+ *                 # do not call releasebuffer
+ *                 info.obj = None             # <<<<<<<<<<<<<<
+ *             else:
+ *                 # need to call releasebuffer
+ */
+    __Pyx_INCREF(Py_None);
+    __Pyx_GIVEREF(Py_None);
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj);
+    __pyx_v_info->obj = Py_None;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":248
+ *             cdef bint hasfields = PyDataType_HASFIELDS(descr)
+ * 
+ *             if not hasfields and not copy_shape:             # <<<<<<<<<<<<<<
+ *                 # do not call releasebuffer
+ *                 info.obj = None
+ */
+    goto __pyx_L14;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":253
+ *             else:
+ *                 # need to call releasebuffer
+ *                 info.obj = self             # <<<<<<<<<<<<<<
+ * 
+ *             if not hasfields:
+ */
+  /*else*/ {
+    __Pyx_INCREF(((PyObject *)__pyx_v_self));
+    __Pyx_GIVEREF(((PyObject *)__pyx_v_self));
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj);
+    __pyx_v_info->obj = ((PyObject *)__pyx_v_self);
+  }
+  __pyx_L14:;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":255
+ *                 info.obj = self
+ * 
+ *             if not hasfields:             # <<<<<<<<<<<<<<
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == c'>' and little_endian) or
+ */
+  __pyx_t_1 = ((!(__pyx_v_hasfields != 0)) != 0);
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":256
+ * 
+ *             if not hasfields:
+ *                 t = descr.type_num             # <<<<<<<<<<<<<<
+ *                 if ((descr.byteorder == c'>' and little_endian) or
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ */
+    __pyx_t_4 = __pyx_v_descr->type_num;
+    __pyx_v_t = __pyx_t_4;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":257
+ *             if not hasfields:
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == c'>' and little_endian) or             # <<<<<<<<<<<<<<
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")
+ */
+    __pyx_t_2 = ((__pyx_v_descr->byteorder == '>') != 0);
+    if (!__pyx_t_2) {
+      goto __pyx_L20_next_or;
+    } else {
+    }
+    __pyx_t_2 = (__pyx_v_little_endian != 0);
+    if (!__pyx_t_2) {
+    } else {
+      __pyx_t_1 = __pyx_t_2;
+      goto __pyx_L19_bool_binop_done;
+    }
+    __pyx_L20_next_or:;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":258
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == c'>' and little_endian) or
+ *                     (descr.byteorder == c'<' and not little_endian)):             # <<<<<<<<<<<<<<
+ *                     raise ValueError(u"Non-native byte order not supported")
+ *                 if   t == NPY_BYTE:        f = "b"
+ */
+    __pyx_t_2 = ((__pyx_v_descr->byteorder == '<') != 0);
+    if (__pyx_t_2) {
+    } else {
+      __pyx_t_1 = __pyx_t_2;
+      goto __pyx_L19_bool_binop_done;
+    }
+    __pyx_t_2 = ((!(__pyx_v_little_endian != 0)) != 0);
+    __pyx_t_1 = __pyx_t_2;
+    __pyx_L19_bool_binop_done:;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":257
+ *             if not hasfields:
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == c'>' and little_endian) or             # <<<<<<<<<<<<<<
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")
+ */
+    if (__pyx_t_1) {
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":259
+ *                 if ((descr.byteorder == c'>' and little_endian) or
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"
+ */
+      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__7, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 259, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __PYX_ERR(1, 259, __pyx_L1_error)
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":257
+ *             if not hasfields:
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == c'>' and little_endian) or             # <<<<<<<<<<<<<<
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")
+ */
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":260
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")
+ *                 if   t == NPY_BYTE:        f = "b"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_UBYTE:       f = "B"
+ *                 elif t == NPY_SHORT:       f = "h"
+ */
+    switch (__pyx_v_t) {
+      case NPY_BYTE:
+      __pyx_v_f = ((char *)"b");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":261
+ *                     raise ValueError(u"Non-native byte order not supported")
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_SHORT:       f = "h"
+ *                 elif t == NPY_USHORT:      f = "H"
+ */
+      case NPY_UBYTE:
+      __pyx_v_f = ((char *)"B");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":262
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"
+ *                 elif t == NPY_SHORT:       f = "h"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_USHORT:      f = "H"
+ *                 elif t == NPY_INT:         f = "i"
+ */
+      case NPY_SHORT:
+      __pyx_v_f = ((char *)"h");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":263
+ *                 elif t == NPY_UBYTE:       f = "B"
+ *                 elif t == NPY_SHORT:       f = "h"
+ *                 elif t == NPY_USHORT:      f = "H"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_INT:         f = "i"
+ *                 elif t == NPY_UINT:        f = "I"
+ */
+      case NPY_USHORT:
+      __pyx_v_f = ((char *)"H");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":264
+ *                 elif t == NPY_SHORT:       f = "h"
+ *                 elif t == NPY_USHORT:      f = "H"
+ *                 elif t == NPY_INT:         f = "i"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_UINT:        f = "I"
+ *                 elif t == NPY_LONG:        f = "l"
+ */
+      case NPY_INT:
+      __pyx_v_f = ((char *)"i");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":265
+ *                 elif t == NPY_USHORT:      f = "H"
+ *                 elif t == NPY_INT:         f = "i"
+ *                 elif t == NPY_UINT:        f = "I"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_LONG:        f = "l"
+ *                 elif t == NPY_ULONG:       f = "L"
+ */
+      case NPY_UINT:
+      __pyx_v_f = ((char *)"I");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":266
+ *                 elif t == NPY_INT:         f = "i"
+ *                 elif t == NPY_UINT:        f = "I"
+ *                 elif t == NPY_LONG:        f = "l"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_ULONG:       f = "L"
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ */
+      case NPY_LONG:
+      __pyx_v_f = ((char *)"l");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":267
+ *                 elif t == NPY_UINT:        f = "I"
+ *                 elif t == NPY_LONG:        f = "l"
+ *                 elif t == NPY_ULONG:       f = "L"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ */
+      case NPY_ULONG:
+      __pyx_v_f = ((char *)"L");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":268
+ *                 elif t == NPY_LONG:        f = "l"
+ *                 elif t == NPY_ULONG:       f = "L"
+ *                 elif t == NPY_LONGLONG:    f = "q"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ *                 elif t == NPY_FLOAT:       f = "f"
+ */
+      case NPY_LONGLONG:
+      __pyx_v_f = ((char *)"q");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":269
+ *                 elif t == NPY_ULONG:       f = "L"
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ *                 elif t == NPY_ULONGLONG:   f = "Q"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_FLOAT:       f = "f"
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ */
+      case NPY_ULONGLONG:
+      __pyx_v_f = ((char *)"Q");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":270
+ *                 elif t == NPY_LONGLONG:    f = "q"
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ *                 elif t == NPY_FLOAT:       f = "f"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ */
+      case NPY_FLOAT:
+      __pyx_v_f = ((char *)"f");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":271
+ *                 elif t == NPY_ULONGLONG:   f = "Q"
+ *                 elif t == NPY_FLOAT:       f = "f"
+ *                 elif t == NPY_DOUBLE:      f = "d"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ */
+      case NPY_DOUBLE:
+      __pyx_v_f = ((char *)"d");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":272
+ *                 elif t == NPY_FLOAT:       f = "f"
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ */
+      case NPY_LONGDOUBLE:
+      __pyx_v_f = ((char *)"g");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":273
+ *                 elif t == NPY_DOUBLE:      f = "d"
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ *                 elif t == NPY_CFLOAT:      f = "Zf"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"
+ */
+      case NPY_CFLOAT:
+      __pyx_v_f = ((char *)"Zf");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":274
+ *                 elif t == NPY_LONGDOUBLE:  f = "g"
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"
+ *                 elif t == NPY_OBJECT:      f = "O"
+ */
+      case NPY_CDOUBLE:
+      __pyx_v_f = ((char *)"Zd");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":275
+ *                 elif t == NPY_CFLOAT:      f = "Zf"
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"             # <<<<<<<<<<<<<<
+ *                 elif t == NPY_OBJECT:      f = "O"
+ *                 else:
+ */
+      case NPY_CLONGDOUBLE:
+      __pyx_v_f = ((char *)"Zg");
+      break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":276
+ *                 elif t == NPY_CDOUBLE:     f = "Zd"
+ *                 elif t == NPY_CLONGDOUBLE: f = "Zg"
+ *                 elif t == NPY_OBJECT:      f = "O"             # <<<<<<<<<<<<<<
+ *                 else:
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ */
+      case NPY_OBJECT:
+      __pyx_v_f = ((char *)"O");
+      break;
+      default:
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":278
+ *                 elif t == NPY_OBJECT:      f = "O"
+ *                 else:
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)             # <<<<<<<<<<<<<<
+ *                 info.format = f
+ *                 return
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_int(__pyx_v_t); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 278, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_6 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_t_3); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 278, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 278, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_GIVEREF(__pyx_t_6);
+      PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_6);
+      __pyx_t_6 = 0;
+      __pyx_t_6 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_3, NULL); if (unlikely(!__pyx_t_6)) __PYX_ERR(1, 278, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_6);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __Pyx_Raise(__pyx_t_6, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+      __PYX_ERR(1, 278, __pyx_L1_error)
+      break;
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":279
+ *                 else:
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ *                 info.format = f             # <<<<<<<<<<<<<<
+ *                 return
+ *             else:
+ */
+    __pyx_v_info->format = __pyx_v_f;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":280
+ *                     raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ *                 info.format = f
+ *                 return             # <<<<<<<<<<<<<<
+ *             else:
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+ */
+    __pyx_r = 0;
+    goto __pyx_L0;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":255
+ *                 info.obj = self
+ * 
+ *             if not hasfields:             # <<<<<<<<<<<<<<
+ *                 t = descr.type_num
+ *                 if ((descr.byteorder == c'>' and little_endian) or
+ */
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":282
+ *                 return
+ *             else:
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)             # <<<<<<<<<<<<<<
+ *                 info.format[0] = c'^' # Native data types, manual alignment
+ *                 offset = 0
+ */
+  /*else*/ {
+    __pyx_v_info->format = ((char *)malloc(0xFF));
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":283
+ *             else:
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+ *                 info.format[0] = c'^' # Native data types, manual alignment             # <<<<<<<<<<<<<<
+ *                 offset = 0
+ *                 f = _util_dtypestring(descr, info.format + 1,
+ */
+    (__pyx_v_info->format[0]) = '^';
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":284
+ *                 info.format = <char*>stdlib.malloc(_buffer_format_string_len)
+ *                 info.format[0] = c'^' # Native data types, manual alignment
+ *                 offset = 0             # <<<<<<<<<<<<<<
+ *                 f = _util_dtypestring(descr, info.format + 1,
+ *                                       info.format + _buffer_format_string_len,
+ */
+    __pyx_v_offset = 0;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":285
+ *                 info.format[0] = c'^' # Native data types, manual alignment
+ *                 offset = 0
+ *                 f = _util_dtypestring(descr, info.format + 1,             # <<<<<<<<<<<<<<
+ *                                       info.format + _buffer_format_string_len,
+ *                                       &offset)
+ */
+    __pyx_t_7 = __pyx_f_5numpy__util_dtypestring(__pyx_v_descr, (__pyx_v_info->format + 1), (__pyx_v_info->format + 0xFF), (&__pyx_v_offset)); if (unlikely(__pyx_t_7 == NULL)) __PYX_ERR(1, 285, __pyx_L1_error)
+    __pyx_v_f = __pyx_t_7;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":288
+ *                                       info.format + _buffer_format_string_len,
+ *                                       &offset)
+ *                 f[0] = c'\0' # Terminate format string             # <<<<<<<<<<<<<<
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ */
+    (__pyx_v_f[0]) = '\x00';
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":197
+ *         # experimental exception made for __getbuffer__ and __releasebuffer__
+ *         # -- the details of this may change.
+ *         def __getbuffer__(ndarray self, Py_buffer* info, int flags):             # <<<<<<<<<<<<<<
+ *             # This implementation of getbuffer is geared towards Cython
+ *             # requirements, and does not yet fullfill the PEP.
+ */
+
+  /* function exit code */
+  __pyx_r = 0;
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_6);
+  __Pyx_AddTraceback("numpy.ndarray.__getbuffer__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = -1;
+  if (__pyx_v_info != NULL && __pyx_v_info->obj != NULL) {
+    __Pyx_GOTREF(__pyx_v_info->obj);
+    __Pyx_DECREF(__pyx_v_info->obj); __pyx_v_info->obj = NULL;
+  }
+  goto __pyx_L2;
+  __pyx_L0:;
+  if (__pyx_v_info != NULL && __pyx_v_info->obj == Py_None) {
+    __Pyx_GOTREF(Py_None);
+    __Pyx_DECREF(Py_None); __pyx_v_info->obj = NULL;
+  }
+  __pyx_L2:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_descr);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":290
+ *                 f[0] = c'\0' # Terminate format string
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):             # <<<<<<<<<<<<<<
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)
+ */
+
+/* Python wrapper */
+static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info); /*proto*/
+static CYTHON_UNUSED void __pyx_pw_5numpy_7ndarray_3__releasebuffer__(PyObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__releasebuffer__ (wrapper)", 0);
+  __pyx_pf_5numpy_7ndarray_2__releasebuffer__(((PyArrayObject *)__pyx_v_self), ((Py_buffer *)__pyx_v_info));
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+static void __pyx_pf_5numpy_7ndarray_2__releasebuffer__(PyArrayObject *__pyx_v_self, Py_buffer *__pyx_v_info) {
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("__releasebuffer__", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":291
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ *             if PyArray_HASFIELDS(self):             # <<<<<<<<<<<<<<
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ */
+  __pyx_t_1 = (PyArray_HASFIELDS(__pyx_v_self) != 0);
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":292
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)             # <<<<<<<<<<<<<<
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 stdlib.free(info.strides)
+ */
+    free(__pyx_v_info->format);
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":291
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):
+ *             if PyArray_HASFIELDS(self):             # <<<<<<<<<<<<<<
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ */
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":293
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 stdlib.free(info.strides)
+ *                 # info.shape was stored after info.strides in the same block
+ */
+  __pyx_t_1 = (((sizeof(npy_intp)) != (sizeof(Py_ssize_t))) != 0);
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":294
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):
+ *                 stdlib.free(info.strides)             # <<<<<<<<<<<<<<
+ *                 # info.shape was stored after info.strides in the same block
+ * 
+ */
+    free(__pyx_v_info->strides);
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":293
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)
+ *             if sizeof(npy_intp) != sizeof(Py_ssize_t):             # <<<<<<<<<<<<<<
+ *                 stdlib.free(info.strides)
+ *                 # info.shape was stored after info.strides in the same block
+ */
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":290
+ *                 f[0] = c'\0' # Terminate format string
+ * 
+ *         def __releasebuffer__(ndarray self, Py_buffer* info):             # <<<<<<<<<<<<<<
+ *             if PyArray_HASFIELDS(self):
+ *                 stdlib.free(info.format)
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":770
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew1(PyObject *__pyx_v_a) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew1", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":771
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):
+ *     return PyArray_MultiIterNew(1, <void*>a)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(1, ((void *)__pyx_v_a)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 771, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":770
+ * ctypedef npy_cdouble     complex_t
+ * 
+ * cdef inline object PyArray_MultiIterNew1(a):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":773
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew2(PyObject *__pyx_v_a, PyObject *__pyx_v_b) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew2", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":774
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(2, ((void *)__pyx_v_a), ((void *)__pyx_v_b)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 774, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":773
+ *     return PyArray_MultiIterNew(1, <void*>a)
+ * 
+ * cdef inline object PyArray_MultiIterNew2(a, b):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew2", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":776
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew3(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew3", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":777
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(3, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 777, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":776
+ *     return PyArray_MultiIterNew(2, <void*>a, <void*>b)
+ * 
+ * cdef inline object PyArray_MultiIterNew3(a, b, c):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew3", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":779
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew4(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew4", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":780
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(4, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 780, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":779
+ *     return PyArray_MultiIterNew(3, <void*>a, <void*>b, <void*> c)
+ * 
+ * cdef inline object PyArray_MultiIterNew4(a, b, c, d):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew4", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":782
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_PyArray_MultiIterNew5(PyObject *__pyx_v_a, PyObject *__pyx_v_b, PyObject *__pyx_v_c, PyObject *__pyx_v_d, PyObject *__pyx_v_e) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannySetupContext("PyArray_MultiIterNew5", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":783
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:
+ */
+  __Pyx_XDECREF(__pyx_r);
+  __pyx_t_1 = PyArray_MultiIterNew(5, ((void *)__pyx_v_a), ((void *)__pyx_v_b), ((void *)__pyx_v_c), ((void *)__pyx_v_d), ((void *)__pyx_v_e)); if (unlikely(!__pyx_t_1)) __PYX_ERR(1, 783, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  __pyx_r = __pyx_t_1;
+  __pyx_t_1 = 0;
+  goto __pyx_L0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":782
+ *     return PyArray_MultiIterNew(4, <void*>a, <void*>b, <void*>c, <void*> d)
+ * 
+ * cdef inline object PyArray_MultiIterNew5(a, b, c, d, e):             # <<<<<<<<<<<<<<
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_AddTraceback("numpy.PyArray_MultiIterNew5", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = 0;
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":785
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:             # <<<<<<<<<<<<<<
+ *     # Recursive utility function used in __getbuffer__ to get format
+ *     # string. The new location in the format string is returned.
+ */
+
+static CYTHON_INLINE char *__pyx_f_5numpy__util_dtypestring(PyArray_Descr *__pyx_v_descr, char *__pyx_v_f, char *__pyx_v_end, int *__pyx_v_offset) {
+  PyArray_Descr *__pyx_v_child = 0;
+  int __pyx_v_endian_detector;
+  int __pyx_v_little_endian;
+  PyObject *__pyx_v_fields = 0;
+  PyObject *__pyx_v_childname = NULL;
+  PyObject *__pyx_v_new_offset = NULL;
+  PyObject *__pyx_v_t = NULL;
+  char *__pyx_r;
+  __Pyx_RefNannyDeclarations
+  PyObject *__pyx_t_1 = NULL;
+  Py_ssize_t __pyx_t_2;
+  PyObject *__pyx_t_3 = NULL;
+  PyObject *__pyx_t_4 = NULL;
+  int __pyx_t_5;
+  int __pyx_t_6;
+  int __pyx_t_7;
+  long __pyx_t_8;
+  char *__pyx_t_9;
+  __Pyx_RefNannySetupContext("_util_dtypestring", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":790
+ * 
+ *     cdef dtype child
+ *     cdef int endian_detector = 1             # <<<<<<<<<<<<<<
+ *     cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)
+ *     cdef tuple fields
+ */
+  __pyx_v_endian_detector = 1;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":791
+ *     cdef dtype child
+ *     cdef int endian_detector = 1
+ *     cdef bint little_endian = ((<char*>&endian_detector)[0] != 0)             # <<<<<<<<<<<<<<
+ *     cdef tuple fields
+ * 
+ */
+  __pyx_v_little_endian = ((((char *)(&__pyx_v_endian_detector))[0]) != 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":794
+ *     cdef tuple fields
+ * 
+ *     for childname in descr.names:             # <<<<<<<<<<<<<<
+ *         fields = descr.fields[childname]
+ *         child, new_offset = fields
+ */
+  if (unlikely(__pyx_v_descr->names == Py_None)) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+    __PYX_ERR(1, 794, __pyx_L1_error)
+  }
+  __pyx_t_1 = __pyx_v_descr->names; __Pyx_INCREF(__pyx_t_1); __pyx_t_2 = 0;
+  for (;;) {
+    if (__pyx_t_2 >= PyTuple_GET_SIZE(__pyx_t_1)) break;
+    #if CYTHON_COMPILING_IN_CPYTHON
+    __pyx_t_3 = PyTuple_GET_ITEM(__pyx_t_1, __pyx_t_2); __Pyx_INCREF(__pyx_t_3); __pyx_t_2++; if (unlikely(0 < 0)) __PYX_ERR(1, 794, __pyx_L1_error)
+    #else
+    __pyx_t_3 = PySequence_ITEM(__pyx_t_1, __pyx_t_2); __pyx_t_2++; if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 794, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    #endif
+    __Pyx_XDECREF_SET(__pyx_v_childname, __pyx_t_3);
+    __pyx_t_3 = 0;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":795
+ * 
+ *     for childname in descr.names:
+ *         fields = descr.fields[childname]             # <<<<<<<<<<<<<<
+ *         child, new_offset = fields
+ * 
+ */
+    if (unlikely(__pyx_v_descr->fields == Py_None)) {
+      PyErr_SetString(PyExc_TypeError, "'NoneType' object is not subscriptable");
+      __PYX_ERR(1, 795, __pyx_L1_error)
+    }
+    __pyx_t_3 = __Pyx_PyDict_GetItem(__pyx_v_descr->fields, __pyx_v_childname); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 795, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    if (!(likely(PyTuple_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "tuple", Py_TYPE(__pyx_t_3)->tp_name), 0))) __PYX_ERR(1, 795, __pyx_L1_error)
+    __Pyx_XDECREF_SET(__pyx_v_fields, ((PyObject*)__pyx_t_3));
+    __pyx_t_3 = 0;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":796
+ *     for childname in descr.names:
+ *         fields = descr.fields[childname]
+ *         child, new_offset = fields             # <<<<<<<<<<<<<<
+ * 
+ *         if (end - f) - <int>(new_offset - offset[0]) < 15:
+ */
+    if (likely(__pyx_v_fields != Py_None)) {
+      PyObject* sequence = __pyx_v_fields;
+      #if CYTHON_COMPILING_IN_CPYTHON
+      Py_ssize_t size = Py_SIZE(sequence);
+      #else
+      Py_ssize_t size = PySequence_Size(sequence);
+      #endif
+      if (unlikely(size != 2)) {
+        if (size > 2) __Pyx_RaiseTooManyValuesError(2);
+        else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);
+        __PYX_ERR(1, 796, __pyx_L1_error)
+      }
+      #if CYTHON_COMPILING_IN_CPYTHON
+      __pyx_t_3 = PyTuple_GET_ITEM(sequence, 0); 
+      __pyx_t_4 = PyTuple_GET_ITEM(sequence, 1); 
+      __Pyx_INCREF(__pyx_t_3);
+      __Pyx_INCREF(__pyx_t_4);
+      #else
+      __pyx_t_3 = PySequence_ITEM(sequence, 0); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 796, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PySequence_ITEM(sequence, 1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 796, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      #endif
+    } else {
+      __Pyx_RaiseNoneNotIterableError(); __PYX_ERR(1, 796, __pyx_L1_error)
+    }
+    if (!(likely(((__pyx_t_3) == Py_None) || likely(__Pyx_TypeTest(__pyx_t_3, __pyx_ptype_5numpy_dtype))))) __PYX_ERR(1, 796, __pyx_L1_error)
+    __Pyx_XDECREF_SET(__pyx_v_child, ((PyArray_Descr *)__pyx_t_3));
+    __pyx_t_3 = 0;
+    __Pyx_XDECREF_SET(__pyx_v_new_offset, __pyx_t_4);
+    __pyx_t_4 = 0;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":798
+ *         child, new_offset = fields
+ * 
+ *         if (end - f) - <int>(new_offset - offset[0]) < 15:             # <<<<<<<<<<<<<<
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ */
+    __pyx_t_4 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 798, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_4);
+    __pyx_t_3 = PyNumber_Subtract(__pyx_v_new_offset, __pyx_t_4); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 798, __pyx_L1_error)
+    __Pyx_GOTREF(__pyx_t_3);
+    __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+    __pyx_t_5 = __Pyx_PyInt_As_int(__pyx_t_3); if (unlikely((__pyx_t_5 == (int)-1) && PyErr_Occurred())) __PYX_ERR(1, 798, __pyx_L1_error)
+    __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+    __pyx_t_6 = ((((__pyx_v_end - __pyx_v_f) - ((int)__pyx_t_5)) < 15) != 0);
+    if (__pyx_t_6) {
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":799
+ * 
+ *         if (end - f) - <int>(new_offset - offset[0]) < 15:
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")             # <<<<<<<<<<<<<<
+ * 
+ *         if ((child.byteorder == c'>' and little_endian) or
+ */
+      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__8, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 799, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __PYX_ERR(1, 799, __pyx_L1_error)
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":798
+ *         child, new_offset = fields
+ * 
+ *         if (end - f) - <int>(new_offset - offset[0]) < 15:             # <<<<<<<<<<<<<<
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ */
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":801
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ *         if ((child.byteorder == c'>' and little_endian) or             # <<<<<<<<<<<<<<
+ *             (child.byteorder == c'<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")
+ */
+    __pyx_t_7 = ((__pyx_v_child->byteorder == '>') != 0);
+    if (!__pyx_t_7) {
+      goto __pyx_L8_next_or;
+    } else {
+    }
+    __pyx_t_7 = (__pyx_v_little_endian != 0);
+    if (!__pyx_t_7) {
+    } else {
+      __pyx_t_6 = __pyx_t_7;
+      goto __pyx_L7_bool_binop_done;
+    }
+    __pyx_L8_next_or:;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":802
+ * 
+ *         if ((child.byteorder == c'>' and little_endian) or
+ *             (child.byteorder == c'<' and not little_endian)):             # <<<<<<<<<<<<<<
+ *             raise ValueError(u"Non-native byte order not supported")
+ *             # One could encode it in the format string and have Cython
+ */
+    __pyx_t_7 = ((__pyx_v_child->byteorder == '<') != 0);
+    if (__pyx_t_7) {
+    } else {
+      __pyx_t_6 = __pyx_t_7;
+      goto __pyx_L7_bool_binop_done;
+    }
+    __pyx_t_7 = ((!(__pyx_v_little_endian != 0)) != 0);
+    __pyx_t_6 = __pyx_t_7;
+    __pyx_L7_bool_binop_done:;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":801
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ *         if ((child.byteorder == c'>' and little_endian) or             # <<<<<<<<<<<<<<
+ *             (child.byteorder == c'<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")
+ */
+    if (__pyx_t_6) {
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":803
+ *         if ((child.byteorder == c'>' and little_endian) or
+ *             (child.byteorder == c'<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *             # One could encode it in the format string and have Cython
+ *             # complain instead, BUT: < and > in format strings also imply
+ */
+      __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_tuple__9, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 803, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __PYX_ERR(1, 803, __pyx_L1_error)
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":801
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")
+ * 
+ *         if ((child.byteorder == c'>' and little_endian) or             # <<<<<<<<<<<<<<
+ *             (child.byteorder == c'<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")
+ */
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":813
+ * 
+ *         # Output padding bytes
+ *         while offset[0] < new_offset:             # <<<<<<<<<<<<<<
+ *             f[0] = 120 # "x"; pad byte
+ *             f += 1
+ */
+    while (1) {
+      __pyx_t_3 = __Pyx_PyInt_From_int((__pyx_v_offset[0])); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 813, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_t_3, __pyx_v_new_offset, Py_LT); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 813, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 813, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (!__pyx_t_6) break;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":814
+ *         # Output padding bytes
+ *         while offset[0] < new_offset:
+ *             f[0] = 120 # "x"; pad byte             # <<<<<<<<<<<<<<
+ *             f += 1
+ *             offset[0] += 1
+ */
+      (__pyx_v_f[0]) = 0x78;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":815
+ *         while offset[0] < new_offset:
+ *             f[0] = 120 # "x"; pad byte
+ *             f += 1             # <<<<<<<<<<<<<<
+ *             offset[0] += 1
+ * 
+ */
+      __pyx_v_f = (__pyx_v_f + 1);
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":816
+ *             f[0] = 120 # "x"; pad byte
+ *             f += 1
+ *             offset[0] += 1             # <<<<<<<<<<<<<<
+ * 
+ *         offset[0] += child.itemsize
+ */
+      __pyx_t_8 = 0;
+      (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + 1);
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":818
+ *             offset[0] += 1
+ * 
+ *         offset[0] += child.itemsize             # <<<<<<<<<<<<<<
+ * 
+ *         if not PyDataType_HASFIELDS(child):
+ */
+    __pyx_t_8 = 0;
+    (__pyx_v_offset[__pyx_t_8]) = ((__pyx_v_offset[__pyx_t_8]) + __pyx_v_child->elsize);
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":820
+ *         offset[0] += child.itemsize
+ * 
+ *         if not PyDataType_HASFIELDS(child):             # <<<<<<<<<<<<<<
+ *             t = child.type_num
+ *             if end - f < 5:
+ */
+    __pyx_t_6 = ((!(PyDataType_HASFIELDS(__pyx_v_child) != 0)) != 0);
+    if (__pyx_t_6) {
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":821
+ * 
+ *         if not PyDataType_HASFIELDS(child):
+ *             t = child.type_num             # <<<<<<<<<<<<<<
+ *             if end - f < 5:
+ *                 raise RuntimeError(u"Format string allocated too short.")
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_child->type_num); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 821, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __Pyx_XDECREF_SET(__pyx_v_t, __pyx_t_4);
+      __pyx_t_4 = 0;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":822
+ *         if not PyDataType_HASFIELDS(child):
+ *             t = child.type_num
+ *             if end - f < 5:             # <<<<<<<<<<<<<<
+ *                 raise RuntimeError(u"Format string allocated too short.")
+ * 
+ */
+      __pyx_t_6 = (((__pyx_v_end - __pyx_v_f) < 5) != 0);
+      if (__pyx_t_6) {
+
+        /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":823
+ *             t = child.type_num
+ *             if end - f < 5:
+ *                 raise RuntimeError(u"Format string allocated too short.")             # <<<<<<<<<<<<<<
+ * 
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ */
+        __pyx_t_4 = __Pyx_PyObject_Call(__pyx_builtin_RuntimeError, __pyx_tuple__10, NULL); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 823, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_4);
+        __Pyx_Raise(__pyx_t_4, 0, 0, 0);
+        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+        __PYX_ERR(1, 823, __pyx_L1_error)
+
+        /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":822
+ *         if not PyDataType_HASFIELDS(child):
+ *             t = child.type_num
+ *             if end - f < 5:             # <<<<<<<<<<<<<<
+ *                 raise RuntimeError(u"Format string allocated too short.")
+ * 
+ */
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":826
+ * 
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ *             if   t == NPY_BYTE:        f[0] =  98 #"b"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_BYTE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 826, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 826, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 826, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 98;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":827
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ *             if   t == NPY_BYTE:        f[0] =  98 #"b"
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UBYTE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 827, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 827, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 827, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 66;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":828
+ *             if   t == NPY_BYTE:        f[0] =  98 #"b"
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_SHORT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 828, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 828, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 828, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x68;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":829
+ *             elif t == NPY_UBYTE:       f[0] =  66 #"B"
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_USHORT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 829, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 829, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 829, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 72;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":830
+ *             elif t == NPY_SHORT:       f[0] = 104 #"h"
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ *             elif t == NPY_INT:         f[0] = 105 #"i"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_INT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 830, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 830, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 830, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x69;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":831
+ *             elif t == NPY_USHORT:      f[0] =  72 #"H"
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_UINT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 831, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 831, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 831, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 73;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":832
+ *             elif t == NPY_INT:         f[0] = 105 #"i"
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONG); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 832, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 832, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 832, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x6C;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":833
+ *             elif t == NPY_UINT:        f[0] =  73 #"I"
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONG); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 833, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 833, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 833, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 76;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":834
+ *             elif t == NPY_LONG:        f[0] = 108 #"l"
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGLONG); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 834, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 834, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 834, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x71;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":835
+ *             elif t == NPY_ULONG:       f[0] = 76  #"L"
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_ULONGLONG); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 835, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 835, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 835, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 81;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":836
+ *             elif t == NPY_LONGLONG:    f[0] = 113 #"q"
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_FLOAT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 836, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 836, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 836, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x66;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":837
+ *             elif t == NPY_ULONGLONG:   f[0] = 81  #"Q"
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_DOUBLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 837, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 837, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 837, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x64;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":838
+ *             elif t == NPY_FLOAT:       f[0] = 102 #"f"
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"             # <<<<<<<<<<<<<<
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_LONGDOUBLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 838, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 838, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 838, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 0x67;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":839
+ *             elif t == NPY_DOUBLE:      f[0] = 100 #"d"
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf             # <<<<<<<<<<<<<<
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CFLOAT); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 839, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 839, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 839, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 90;
+        (__pyx_v_f[1]) = 0x66;
+        __pyx_v_f = (__pyx_v_f + 1);
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":840
+ *             elif t == NPY_LONGDOUBLE:  f[0] = 103 #"g"
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd             # <<<<<<<<<<<<<<
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CDOUBLE); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 840, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 840, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 840, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 90;
+        (__pyx_v_f[1]) = 0x64;
+        __pyx_v_f = (__pyx_v_f + 1);
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":841
+ *             elif t == NPY_CFLOAT:      f[0] = 90; f[1] = 102; f += 1 # Zf
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg             # <<<<<<<<<<<<<<
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"
+ *             else:
+ */
+      __pyx_t_3 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_CLONGDOUBLE); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 841, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_3);
+      __pyx_t_4 = PyObject_RichCompare(__pyx_v_t, __pyx_t_3, Py_EQ); __Pyx_XGOTREF(__pyx_t_4); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 841, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_4); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 841, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 90;
+        (__pyx_v_f[1]) = 0x67;
+        __pyx_v_f = (__pyx_v_f + 1);
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":842
+ *             elif t == NPY_CDOUBLE:     f[0] = 90; f[1] = 100; f += 1 # Zd
+ *             elif t == NPY_CLONGDOUBLE: f[0] = 90; f[1] = 103; f += 1 # Zg
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"             # <<<<<<<<<<<<<<
+ *             else:
+ *                 raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ */
+      __pyx_t_4 = __Pyx_PyInt_From_enum__NPY_TYPES(NPY_OBJECT); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 842, __pyx_L1_error)
+      __Pyx_GOTREF(__pyx_t_4);
+      __pyx_t_3 = PyObject_RichCompare(__pyx_v_t, __pyx_t_4, Py_EQ); __Pyx_XGOTREF(__pyx_t_3); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 842, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+      __pyx_t_6 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_6 < 0)) __PYX_ERR(1, 842, __pyx_L1_error)
+      __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+      if (__pyx_t_6) {
+        (__pyx_v_f[0]) = 79;
+        goto __pyx_L15;
+      }
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":844
+ *             elif t == NPY_OBJECT:      f[0] = 79 #"O"
+ *             else:
+ *                 raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)             # <<<<<<<<<<<<<<
+ *             f += 1
+ *         else:
+ */
+      /*else*/ {
+        __pyx_t_3 = PyUnicode_Format(__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_v_t); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 844, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_3);
+        __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) __PYX_ERR(1, 844, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_4);
+        __Pyx_GIVEREF(__pyx_t_3);
+        PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_3);
+        __pyx_t_3 = 0;
+        __pyx_t_3 = __Pyx_PyObject_Call(__pyx_builtin_ValueError, __pyx_t_4, NULL); if (unlikely(!__pyx_t_3)) __PYX_ERR(1, 844, __pyx_L1_error)
+        __Pyx_GOTREF(__pyx_t_3);
+        __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+        __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+        __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+        __PYX_ERR(1, 844, __pyx_L1_error)
+      }
+      __pyx_L15:;
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":845
+ *             else:
+ *                 raise ValueError(u"unknown dtype code in numpy.pxd (%d)" % t)
+ *             f += 1             # <<<<<<<<<<<<<<
+ *         else:
+ *             # Cython ignores struct boundary information ("T{...}"),
+ */
+      __pyx_v_f = (__pyx_v_f + 1);
+
+      /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":820
+ *         offset[0] += child.itemsize
+ * 
+ *         if not PyDataType_HASFIELDS(child):             # <<<<<<<<<<<<<<
+ *             t = child.type_num
+ *             if end - f < 5:
+ */
+      goto __pyx_L13;
+    }
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":849
+ *             # Cython ignores struct boundary information ("T{...}"),
+ *             # so don't output it
+ *             f = _util_dtypestring(child, f, end, offset)             # <<<<<<<<<<<<<<
+ *     return f
+ * 
+ */
+    /*else*/ {
+      __pyx_t_9 = __pyx_f_5numpy__util_dtypestring(__pyx_v_child, __pyx_v_f, __pyx_v_end, __pyx_v_offset); if (unlikely(__pyx_t_9 == NULL)) __PYX_ERR(1, 849, __pyx_L1_error)
+      __pyx_v_f = __pyx_t_9;
+    }
+    __pyx_L13:;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":794
+ *     cdef tuple fields
+ * 
+ *     for childname in descr.names:             # <<<<<<<<<<<<<<
+ *         fields = descr.fields[childname]
+ *         child, new_offset = fields
+ */
+  }
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":850
+ *             # so don't output it
+ *             f = _util_dtypestring(child, f, end, offset)
+ *     return f             # <<<<<<<<<<<<<<
+ * 
+ * 
+ */
+  __pyx_r = __pyx_v_f;
+  goto __pyx_L0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":785
+ *     return PyArray_MultiIterNew(5, <void*>a, <void*>b, <void*>c, <void*> d, <void*> e)
+ * 
+ * cdef inline char* _util_dtypestring(dtype descr, char* f, char* end, int* offset) except NULL:             # <<<<<<<<<<<<<<
+ *     # Recursive utility function used in __getbuffer__ to get format
+ *     # string. The new location in the format string is returned.
+ */
+
+  /* function exit code */
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  __Pyx_XDECREF(__pyx_t_3);
+  __Pyx_XDECREF(__pyx_t_4);
+  __Pyx_AddTraceback("numpy._util_dtypestring", __pyx_clineno, __pyx_lineno, __pyx_filename);
+  __pyx_r = NULL;
+  __pyx_L0:;
+  __Pyx_XDECREF((PyObject *)__pyx_v_child);
+  __Pyx_XDECREF(__pyx_v_fields);
+  __Pyx_XDECREF(__pyx_v_childname);
+  __Pyx_XDECREF(__pyx_v_new_offset);
+  __Pyx_XDECREF(__pyx_v_t);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":966
+ * 
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *      cdef PyObject* baseptr
+ *      if base is None:
+ */
+
+static CYTHON_INLINE void __pyx_f_5numpy_set_array_base(PyArrayObject *__pyx_v_arr, PyObject *__pyx_v_base) {
+  PyObject *__pyx_v_baseptr;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  int __pyx_t_2;
+  __Pyx_RefNannySetupContext("set_array_base", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":968
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *      cdef PyObject* baseptr
+ *      if base is None:             # <<<<<<<<<<<<<<
+ *          baseptr = NULL
+ *      else:
+ */
+  __pyx_t_1 = (__pyx_v_base == Py_None);
+  __pyx_t_2 = (__pyx_t_1 != 0);
+  if (__pyx_t_2) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":969
+ *      cdef PyObject* baseptr
+ *      if base is None:
+ *          baseptr = NULL             # <<<<<<<<<<<<<<
+ *      else:
+ *          Py_INCREF(base) # important to do this before decref below!
+ */
+    __pyx_v_baseptr = NULL;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":968
+ * cdef inline void set_array_base(ndarray arr, object base):
+ *      cdef PyObject* baseptr
+ *      if base is None:             # <<<<<<<<<<<<<<
+ *          baseptr = NULL
+ *      else:
+ */
+    goto __pyx_L3;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":971
+ *          baseptr = NULL
+ *      else:
+ *          Py_INCREF(base) # important to do this before decref below!             # <<<<<<<<<<<<<<
+ *          baseptr = <PyObject*>base
+ *      Py_XDECREF(arr.base)
+ */
+  /*else*/ {
+    Py_INCREF(__pyx_v_base);
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":972
+ *      else:
+ *          Py_INCREF(base) # important to do this before decref below!
+ *          baseptr = <PyObject*>base             # <<<<<<<<<<<<<<
+ *      Py_XDECREF(arr.base)
+ *      arr.base = baseptr
+ */
+    __pyx_v_baseptr = ((PyObject *)__pyx_v_base);
+  }
+  __pyx_L3:;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":973
+ *          Py_INCREF(base) # important to do this before decref below!
+ *          baseptr = <PyObject*>base
+ *      Py_XDECREF(arr.base)             # <<<<<<<<<<<<<<
+ *      arr.base = baseptr
+ * 
+ */
+  Py_XDECREF(__pyx_v_arr->base);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":974
+ *          baseptr = <PyObject*>base
+ *      Py_XDECREF(arr.base)
+ *      arr.base = baseptr             # <<<<<<<<<<<<<<
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ */
+  __pyx_v_arr->base = __pyx_v_baseptr;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":966
+ * 
+ * 
+ * cdef inline void set_array_base(ndarray arr, object base):             # <<<<<<<<<<<<<<
+ *      cdef PyObject* baseptr
+ *      if base is None:
+ */
+
+  /* function exit code */
+  __Pyx_RefNannyFinishContext();
+}
+
+/* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":976
+ *      arr.base = baseptr
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     if arr.base is NULL:
+ *         return None
+ */
+
+static CYTHON_INLINE PyObject *__pyx_f_5numpy_get_array_base(PyArrayObject *__pyx_v_arr) {
+  PyObject *__pyx_r = NULL;
+  __Pyx_RefNannyDeclarations
+  int __pyx_t_1;
+  __Pyx_RefNannySetupContext("get_array_base", 0);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":977
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ *     if arr.base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     else:
+ */
+  __pyx_t_1 = ((__pyx_v_arr->base == NULL) != 0);
+  if (__pyx_t_1) {
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":978
+ * cdef inline object get_array_base(ndarray arr):
+ *     if arr.base is NULL:
+ *         return None             # <<<<<<<<<<<<<<
+ *     else:
+ *         return <object>arr.base
+ */
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(Py_None);
+    __pyx_r = Py_None;
+    goto __pyx_L0;
+
+    /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":977
+ * 
+ * cdef inline object get_array_base(ndarray arr):
+ *     if arr.base is NULL:             # <<<<<<<<<<<<<<
+ *         return None
+ *     else:
+ */
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":980
+ *         return None
+ *     else:
+ *         return <object>arr.base             # <<<<<<<<<<<<<<
+ */
+  /*else*/ {
+    __Pyx_XDECREF(__pyx_r);
+    __Pyx_INCREF(((PyObject *)__pyx_v_arr->base));
+    __pyx_r = ((PyObject *)__pyx_v_arr->base);
+    goto __pyx_L0;
+  }
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":976
+ *      arr.base = baseptr
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     if arr.base is NULL:
+ *         return None
+ */
+
+  /* function exit code */
+  __pyx_L0:;
+  __Pyx_XGIVEREF(__pyx_r);
+  __Pyx_RefNannyFinishContext();
+  return __pyx_r;
+}
+
+static PyMethodDef __pyx_methods[] = {
+  {0, 0, 0, 0}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef __pyx_moduledef = {
+  #if PY_VERSION_HEX < 0x03020000
+    { PyObject_HEAD_INIT(NULL) NULL, 0, NULL },
+  #else
+    PyModuleDef_HEAD_INIT,
+  #endif
+    "gpu_nms",
+    0, /* m_doc */
+    -1, /* m_size */
+    __pyx_methods /* m_methods */,
+    NULL, /* m_reload */
+    NULL, /* m_traverse */
+    NULL, /* m_clear */
+    NULL /* m_free */
+};
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+  {&__pyx_kp_s_D_v_zix_caffe_caffe_win_20160523, __pyx_k_D_v_zix_caffe_caffe_win_20160523, sizeof(__pyx_k_D_v_zix_caffe_caffe_win_20160523), 0, 0, 1, 0},
+  {&__pyx_kp_u_Format_string_allocated_too_shor, __pyx_k_Format_string_allocated_too_shor, sizeof(__pyx_k_Format_string_allocated_too_shor), 0, 1, 0, 0},
+  {&__pyx_kp_u_Format_string_allocated_too_shor_2, __pyx_k_Format_string_allocated_too_shor_2, sizeof(__pyx_k_Format_string_allocated_too_shor_2), 0, 1, 0, 0},
+  {&__pyx_kp_u_Non_native_byte_order_not_suppor, __pyx_k_Non_native_byte_order_not_suppor, sizeof(__pyx_k_Non_native_byte_order_not_suppor), 0, 1, 0, 0},
+  {&__pyx_n_s_RuntimeError, __pyx_k_RuntimeError, sizeof(__pyx_k_RuntimeError), 0, 0, 1, 1},
+  {&__pyx_n_s_ValueError, __pyx_k_ValueError, sizeof(__pyx_k_ValueError), 0, 0, 1, 1},
+  {&__pyx_n_s_argsort, __pyx_k_argsort, sizeof(__pyx_k_argsort), 0, 0, 1, 1},
+  {&__pyx_n_s_boxes_dim, __pyx_k_boxes_dim, sizeof(__pyx_k_boxes_dim), 0, 0, 1, 1},
+  {&__pyx_n_s_boxes_num, __pyx_k_boxes_num, sizeof(__pyx_k_boxes_num), 0, 0, 1, 1},
+  {&__pyx_n_s_dets, __pyx_k_dets, sizeof(__pyx_k_dets), 0, 0, 1, 1},
+  {&__pyx_n_s_device_id, __pyx_k_device_id, sizeof(__pyx_k_device_id), 0, 0, 1, 1},
+  {&__pyx_n_s_dtype, __pyx_k_dtype, sizeof(__pyx_k_dtype), 0, 0, 1, 1},
+  {&__pyx_n_s_gpu_nms, __pyx_k_gpu_nms, sizeof(__pyx_k_gpu_nms), 0, 0, 1, 1},
+  {&__pyx_n_s_import, __pyx_k_import, sizeof(__pyx_k_import), 0, 0, 1, 1},
+  {&__pyx_n_s_int32, __pyx_k_int32, sizeof(__pyx_k_int32), 0, 0, 1, 1},
+  {&__pyx_n_s_keep, __pyx_k_keep, sizeof(__pyx_k_keep), 0, 0, 1, 1},
+  {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
+  {&__pyx_kp_u_ndarray_is_not_C_contiguous, __pyx_k_ndarray_is_not_C_contiguous, sizeof(__pyx_k_ndarray_is_not_C_contiguous), 0, 1, 0, 0},
+  {&__pyx_kp_u_ndarray_is_not_Fortran_contiguou, __pyx_k_ndarray_is_not_Fortran_contiguou, sizeof(__pyx_k_ndarray_is_not_Fortran_contiguou), 0, 1, 0, 0},
+  {&__pyx_n_s_nms_gpu_nms, __pyx_k_nms_gpu_nms, sizeof(__pyx_k_nms_gpu_nms), 0, 0, 1, 1},
+  {&__pyx_n_s_np, __pyx_k_np, sizeof(__pyx_k_np), 0, 0, 1, 1},
+  {&__pyx_n_s_num_out, __pyx_k_num_out, sizeof(__pyx_k_num_out), 0, 0, 1, 1},
+  {&__pyx_n_s_numpy, __pyx_k_numpy, sizeof(__pyx_k_numpy), 0, 0, 1, 1},
+  {&__pyx_n_s_order, __pyx_k_order, sizeof(__pyx_k_order), 0, 0, 1, 1},
+  {&__pyx_n_s_range, __pyx_k_range, sizeof(__pyx_k_range), 0, 0, 1, 1},
+  {&__pyx_n_s_scores, __pyx_k_scores, sizeof(__pyx_k_scores), 0, 0, 1, 1},
+  {&__pyx_n_s_sorted_dets, __pyx_k_sorted_dets, sizeof(__pyx_k_sorted_dets), 0, 0, 1, 1},
+  {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
+  {&__pyx_n_s_thresh, __pyx_k_thresh, sizeof(__pyx_k_thresh), 0, 0, 1, 1},
+  {&__pyx_kp_u_unknown_dtype_code_in_numpy_pxd, __pyx_k_unknown_dtype_code_in_numpy_pxd, sizeof(__pyx_k_unknown_dtype_code_in_numpy_pxd), 0, 1, 0, 0},
+  {&__pyx_n_s_zeros, __pyx_k_zeros, sizeof(__pyx_k_zeros), 0, 0, 1, 1},
+  {0, 0, 0, 0, 0, 0, 0}
+};
+static int __Pyx_InitCachedBuiltins(void) {
+  __pyx_builtin_ValueError = __Pyx_GetBuiltinName(__pyx_n_s_ValueError); if (!__pyx_builtin_ValueError) __PYX_ERR(1, 218, __pyx_L1_error)
+  __pyx_builtin_range = __Pyx_GetBuiltinName(__pyx_n_s_range); if (!__pyx_builtin_range) __PYX_ERR(1, 231, __pyx_L1_error)
+  __pyx_builtin_RuntimeError = __Pyx_GetBuiltinName(__pyx_n_s_RuntimeError); if (!__pyx_builtin_RuntimeError) __PYX_ERR(1, 799, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+static int __Pyx_InitCachedConstants(void) {
+  __Pyx_RefNannyDeclarations
+  __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+  /* "nms/gpu_nms.pyx":24
+ *         keep = np.zeros(boxes_num, dtype=np.int32)
+ *     cdef np.ndarray[np.float32_t, ndim=1] \
+ *         scores = dets[:, 4]             # <<<<<<<<<<<<<<
+ *     #cdef np.ndarray[np.int_t, ndim=1] \  // 20160601, by xzn
+ *     #    order = scores.argsort()[::-1]
+ */
+  __pyx_slice_ = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice_)) __PYX_ERR(0, 24, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice_);
+  __Pyx_GIVEREF(__pyx_slice_);
+  __pyx_tuple__2 = PyTuple_Pack(2, __pyx_slice_, __pyx_int_4); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 24, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__2);
+  __Pyx_GIVEREF(__pyx_tuple__2);
+
+  /* "nms/gpu_nms.pyx":28
+ *     #    order = scores.argsort()[::-1]
+ *     cdef np.ndarray[np.intp_t, ndim=1] \
+ *         order = scores.argsort()[::-1]             # <<<<<<<<<<<<<<
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]
+ */
+  __pyx_slice__3 = PySlice_New(Py_None, Py_None, __pyx_int_neg_1); if (unlikely(!__pyx_slice__3)) __PYX_ERR(0, 28, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice__3);
+  __Pyx_GIVEREF(__pyx_slice__3);
+
+  /* "nms/gpu_nms.pyx":30
+ *         order = scores.argsort()[::-1]
+ *     cdef np.ndarray[np.float32_t, ndim=2] \
+ *         sorted_dets = dets[order, :]             # <<<<<<<<<<<<<<
+ *     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+ *     keep = keep[:num_out]
+ */
+  __pyx_slice__4 = PySlice_New(Py_None, Py_None, Py_None); if (unlikely(!__pyx_slice__4)) __PYX_ERR(0, 30, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_slice__4);
+  __Pyx_GIVEREF(__pyx_slice__4);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":218
+ *             if ((flags & pybuf.PyBUF_C_CONTIGUOUS == pybuf.PyBUF_C_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_C_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not C contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ */
+  __pyx_tuple__5 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_C_contiguous); if (unlikely(!__pyx_tuple__5)) __PYX_ERR(1, 218, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__5);
+  __Pyx_GIVEREF(__pyx_tuple__5);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":222
+ *             if ((flags & pybuf.PyBUF_F_CONTIGUOUS == pybuf.PyBUF_F_CONTIGUOUS)
+ *                 and not PyArray_CHKFLAGS(self, NPY_F_CONTIGUOUS)):
+ *                 raise ValueError(u"ndarray is not Fortran contiguous")             # <<<<<<<<<<<<<<
+ * 
+ *             info.buf = PyArray_DATA(self)
+ */
+  __pyx_tuple__6 = PyTuple_Pack(1, __pyx_kp_u_ndarray_is_not_Fortran_contiguou); if (unlikely(!__pyx_tuple__6)) __PYX_ERR(1, 222, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__6);
+  __Pyx_GIVEREF(__pyx_tuple__6);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":259
+ *                 if ((descr.byteorder == c'>' and little_endian) or
+ *                     (descr.byteorder == c'<' and not little_endian)):
+ *                     raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *                 if   t == NPY_BYTE:        f = "b"
+ *                 elif t == NPY_UBYTE:       f = "B"
+ */
+  __pyx_tuple__7 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__7)) __PYX_ERR(1, 259, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__7);
+  __Pyx_GIVEREF(__pyx_tuple__7);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":799
+ * 
+ *         if (end - f) - <int>(new_offset - offset[0]) < 15:
+ *             raise RuntimeError(u"Format string allocated too short, see comment in numpy.pxd")             # <<<<<<<<<<<<<<
+ * 
+ *         if ((child.byteorder == c'>' and little_endian) or
+ */
+  __pyx_tuple__8 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor); if (unlikely(!__pyx_tuple__8)) __PYX_ERR(1, 799, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__8);
+  __Pyx_GIVEREF(__pyx_tuple__8);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":803
+ *         if ((child.byteorder == c'>' and little_endian) or
+ *             (child.byteorder == c'<' and not little_endian)):
+ *             raise ValueError(u"Non-native byte order not supported")             # <<<<<<<<<<<<<<
+ *             # One could encode it in the format string and have Cython
+ *             # complain instead, BUT: < and > in format strings also imply
+ */
+  __pyx_tuple__9 = PyTuple_Pack(1, __pyx_kp_u_Non_native_byte_order_not_suppor); if (unlikely(!__pyx_tuple__9)) __PYX_ERR(1, 803, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__9);
+  __Pyx_GIVEREF(__pyx_tuple__9);
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":823
+ *             t = child.type_num
+ *             if end - f < 5:
+ *                 raise RuntimeError(u"Format string allocated too short.")             # <<<<<<<<<<<<<<
+ * 
+ *             # Until ticket #99 is fixed, use integers to avoid warnings
+ */
+  __pyx_tuple__10 = PyTuple_Pack(1, __pyx_kp_u_Format_string_allocated_too_shor_2); if (unlikely(!__pyx_tuple__10)) __PYX_ERR(1, 823, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__10);
+  __Pyx_GIVEREF(__pyx_tuple__10);
+
+  /* "nms/gpu_nms.pyx":16
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+  __pyx_tuple__11 = PyTuple_Pack(10, __pyx_n_s_dets, __pyx_n_s_thresh, __pyx_n_s_device_id, __pyx_n_s_boxes_num, __pyx_n_s_boxes_dim, __pyx_n_s_num_out, __pyx_n_s_keep, __pyx_n_s_scores, __pyx_n_s_order, __pyx_n_s_sorted_dets); if (unlikely(!__pyx_tuple__11)) __PYX_ERR(0, 16, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_tuple__11);
+  __Pyx_GIVEREF(__pyx_tuple__11);
+  __pyx_codeobj__12 = (PyObject*)__Pyx_PyCode_New(3, 0, 10, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_tuple__11, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_D_v_zix_caffe_caffe_win_20160523, __pyx_n_s_gpu_nms, 16, __pyx_empty_bytes); if (unlikely(!__pyx_codeobj__12)) __PYX_ERR(0, 16, __pyx_L1_error)
+  __Pyx_RefNannyFinishContext();
+  return 0;
+  __pyx_L1_error:;
+  __Pyx_RefNannyFinishContext();
+  return -1;
+}
+
+static int __Pyx_InitGlobals(void) {
+  if (__Pyx_InitStrings(__pyx_string_tab) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
+  __pyx_int_4 = PyInt_FromLong(4); if (unlikely(!__pyx_int_4)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_int_neg_1 = PyInt_FromLong(-1); if (unlikely(!__pyx_int_neg_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  return 0;
+  __pyx_L1_error:;
+  return -1;
+}
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initgpu_nms(void); /*proto*/
+PyMODINIT_FUNC initgpu_nms(void)
+#else
+PyMODINIT_FUNC PyInit_gpu_nms(void); /*proto*/
+PyMODINIT_FUNC PyInit_gpu_nms(void)
+#endif
+{
+  PyObject *__pyx_t_1 = NULL;
+  __Pyx_RefNannyDeclarations
+  #if CYTHON_REFNANNY
+  __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+  if (!__Pyx_RefNanny) {
+      PyErr_Clear();
+      __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+      if (!__Pyx_RefNanny)
+          Py_FatalError("failed to import 'refnanny' module");
+  }
+  #endif
+  __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_gpu_nms(void)", 0);
+  if (__Pyx_check_binary_version() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_empty_unicode = PyUnicode_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_unicode)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #ifdef __Pyx_CyFunction_USED
+  if (__pyx_CyFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_FusedFunction_USED
+  if (__pyx_FusedFunction_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Coroutine_USED
+  if (__pyx_Coroutine_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_Generator_USED
+  if (__pyx_Generator_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  #ifdef __Pyx_StopAsyncIteration_USED
+  if (__pyx_StopAsyncIteration_init() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  /*--- Library function declarations ---*/
+  /*--- Threads initialization code ---*/
+  #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+  #ifdef WITH_THREAD /* Python build with threading support? */
+  PyEval_InitThreads();
+  #endif
+  #endif
+  /*--- Module creation code ---*/
+  #if PY_MAJOR_VERSION < 3
+  __pyx_m = Py_InitModule4("gpu_nms", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
+  #else
+  __pyx_m = PyModule_Create(&__pyx_moduledef);
+  #endif
+  if (unlikely(!__pyx_m)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) __PYX_ERR(0, 1, __pyx_L1_error)
+  Py_INCREF(__pyx_d);
+  __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) __PYX_ERR(0, 1, __pyx_L1_error)
+  #if CYTHON_COMPILING_IN_PYPY
+  Py_INCREF(__pyx_b);
+  #endif
+  if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) __PYX_ERR(0, 1, __pyx_L1_error);
+  /*--- Initialize various global constants etc. ---*/
+  if (__Pyx_InitGlobals() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+  if (__Pyx_init_sys_getdefaultencoding_params() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+  if (__pyx_module_is_main_nms__gpu_nms) {
+    if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  }
+  #if PY_MAJOR_VERSION >= 3
+  {
+    PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) __PYX_ERR(0, 1, __pyx_L1_error)
+    if (!PyDict_GetItemString(modules, "nms.gpu_nms")) {
+      if (unlikely(PyDict_SetItemString(modules, "nms.gpu_nms", __pyx_m) < 0)) __PYX_ERR(0, 1, __pyx_L1_error)
+    }
+  }
+  #endif
+  /*--- Builtin init code ---*/
+  if (__Pyx_InitCachedBuiltins() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Constants init code ---*/
+  if (__Pyx_InitCachedConstants() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  /*--- Global init code ---*/
+  /*--- Variable export code ---*/
+  /*--- Function export code ---*/
+  /*--- Type init code ---*/
+  /*--- Type import code ---*/
+  __pyx_ptype_7cpython_4type_type = __Pyx_ImportType(__Pyx_BUILTIN_MODULE_NAME, "type", 
+  #if CYTHON_COMPILING_IN_PYPY
+  sizeof(PyTypeObject),
+  #else
+  sizeof(PyHeapTypeObject),
+  #endif
+  0); if (unlikely(!__pyx_ptype_7cpython_4type_type)) __PYX_ERR(2, 9, __pyx_L1_error)
+  __pyx_ptype_5numpy_dtype = __Pyx_ImportType("numpy", "dtype", sizeof(PyArray_Descr), 0); if (unlikely(!__pyx_ptype_5numpy_dtype)) __PYX_ERR(1, 155, __pyx_L1_error)
+  __pyx_ptype_5numpy_flatiter = __Pyx_ImportType("numpy", "flatiter", sizeof(PyArrayIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_flatiter)) __PYX_ERR(1, 168, __pyx_L1_error)
+  __pyx_ptype_5numpy_broadcast = __Pyx_ImportType("numpy", "broadcast", sizeof(PyArrayMultiIterObject), 0); if (unlikely(!__pyx_ptype_5numpy_broadcast)) __PYX_ERR(1, 172, __pyx_L1_error)
+  __pyx_ptype_5numpy_ndarray = __Pyx_ImportType("numpy", "ndarray", sizeof(PyArrayObject), 0); if (unlikely(!__pyx_ptype_5numpy_ndarray)) __PYX_ERR(1, 181, __pyx_L1_error)
+  __pyx_ptype_5numpy_ufunc = __Pyx_ImportType("numpy", "ufunc", sizeof(PyUFuncObject), 0); if (unlikely(!__pyx_ptype_5numpy_ufunc)) __PYX_ERR(1, 861, __pyx_L1_error)
+  /*--- Variable import code ---*/
+  /*--- Function import code ---*/
+  /*--- Execution code ---*/
+  #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
+  if (__Pyx_patch_abc() < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  #endif
+
+  /* "nms/gpu_nms.pyx":8
+ * # --------------------------------------------------------
+ * 
+ * import numpy as np             # <<<<<<<<<<<<<<
+ * cimport numpy as np
+ * 
+ */
+  __pyx_t_1 = __Pyx_Import(__pyx_n_s_numpy, 0, -1); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 8, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_np, __pyx_t_1) < 0) __PYX_ERR(0, 8, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "nms/gpu_nms.pyx":11
+ * cimport numpy as np
+ * 
+ * assert sizeof(int) == sizeof(np.int32_t)             # <<<<<<<<<<<<<<
+ * 
+ * cdef extern from "gpu_nms.hpp":
+ */
+  #ifndef CYTHON_WITHOUT_ASSERTIONS
+  if (unlikely(!Py_OptimizeFlag)) {
+    if (unlikely(!(((sizeof(int)) == (sizeof(__pyx_t_5numpy_int32_t))) != 0))) {
+      PyErr_SetNone(PyExc_AssertionError);
+      __PYX_ERR(0, 11, __pyx_L1_error)
+    }
+  }
+  #endif
+
+  /* "nms/gpu_nms.pyx":16
+ *     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+ * 
+ * def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,             # <<<<<<<<<<<<<<
+ *             np.int32_t device_id=0):
+ *     cdef int boxes_num = dets.shape[0]
+ */
+  __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_3nms_7gpu_nms_1gpu_nms, NULL, __pyx_n_s_nms_gpu_nms); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 16, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_gpu_nms, __pyx_t_1) < 0) __PYX_ERR(0, 16, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "nms/gpu_nms.pyx":1
+ * # --------------------------------------------------------             # <<<<<<<<<<<<<<
+ * # Faster R-CNN
+ * # Copyright (c) 2015 Microsoft
+ */
+  __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_GOTREF(__pyx_t_1);
+  if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) __PYX_ERR(0, 1, __pyx_L1_error)
+  __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+  /* "C:/Anaconda2/lib/site-packages/Cython/Includes/numpy/__init__.pxd":976
+ *      arr.base = baseptr
+ * 
+ * cdef inline object get_array_base(ndarray arr):             # <<<<<<<<<<<<<<
+ *     if arr.base is NULL:
+ *         return None
+ */
+
+  /*--- Wrapped vars code ---*/
+
+  goto __pyx_L0;
+  __pyx_L1_error:;
+  __Pyx_XDECREF(__pyx_t_1);
+  if (__pyx_m) {
+    if (__pyx_d) {
+      __Pyx_AddTraceback("init nms.gpu_nms", __pyx_clineno, __pyx_lineno, __pyx_filename);
+    }
+    Py_DECREF(__pyx_m); __pyx_m = 0;
+  } else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_ImportError, "init nms.gpu_nms");
+  }
+  __pyx_L0:;
+  __Pyx_RefNannyFinishContext();
+  #if PY_MAJOR_VERSION < 3
+  return;
+  #else
+  return __pyx_m;
+  #endif
+}
+
+/* --- Runtime support code --- */
+/* Refnanny */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+    PyObject *m = NULL, *p = NULL;
+    void *r = NULL;
+    m = PyImport_ImportModule((char *)modname);
+    if (!m) goto end;
+    p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
+    if (!p) goto end;
+    r = PyLong_AsVoidPtr(p);
+end:
+    Py_XDECREF(p);
+    Py_XDECREF(m);
+    return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif
+
+/* RaiseArgTupleInvalid */
+static void __Pyx_RaiseArgtupleInvalid(
+    const char* func_name,
+    int exact,
+    Py_ssize_t num_min,
+    Py_ssize_t num_max,
+    Py_ssize_t num_found)
+{
+    Py_ssize_t num_expected;
+    const char *more_or_less;
+    if (num_found < num_min) {
+        num_expected = num_min;
+        more_or_less = "at least";
+    } else {
+        num_expected = num_max;
+        more_or_less = "at most";
+    }
+    if (exact) {
+        more_or_less = "exactly";
+    }
+    PyErr_Format(PyExc_TypeError,
+                 "%.200s() takes %.8s %" CYTHON_FORMAT_SSIZE_T "d positional argument%.1s (%" CYTHON_FORMAT_SSIZE_T "d given)",
+                 func_name, more_or_less, num_expected,
+                 (num_expected == 1) ? "" : "s", num_found);
+}
+
+/* RaiseDoubleKeywords */
+static void __Pyx_RaiseDoubleKeywordsError(
+    const char* func_name,
+    PyObject* kw_name)
+{
+    PyErr_Format(PyExc_TypeError,
+        #if PY_MAJOR_VERSION >= 3
+        "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+        #else
+        "%s() got multiple values for keyword argument '%s'", func_name,
+        PyString_AsString(kw_name));
+        #endif
+}
+
+/* ParseKeywords */
+static int __Pyx_ParseOptionalKeywords(
+    PyObject *kwds,
+    PyObject **argnames[],
+    PyObject *kwds2,
+    PyObject *values[],
+    Py_ssize_t num_pos_args,
+    const char* function_name)
+{
+    PyObject *key = 0, *value = 0;
+    Py_ssize_t pos = 0;
+    PyObject*** name;
+    PyObject*** first_kw_arg = argnames + num_pos_args;
+    while (PyDict_Next(kwds, &pos, &key, &value)) {
+        name = first_kw_arg;
+        while (*name && (**name != key)) name++;
+        if (*name) {
+            values[name-argnames] = value;
+            continue;
+        }
+        name = first_kw_arg;
+        #if PY_MAJOR_VERSION < 3
+        if (likely(PyString_CheckExact(key)) || likely(PyString_Check(key))) {
+            while (*name) {
+                if ((CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**name) == PyString_GET_SIZE(key))
+                        && _PyString_Eq(**name, key)) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    if ((**argname == key) || (
+                            (CYTHON_COMPILING_IN_PYPY || PyString_GET_SIZE(**argname) == PyString_GET_SIZE(key))
+                             && _PyString_Eq(**argname, key))) {
+                        goto arg_passed_twice;
+                    }
+                    argname++;
+                }
+            }
+        } else
+        #endif
+        if (likely(PyUnicode_Check(key))) {
+            while (*name) {
+                int cmp = (**name == key) ? 0 :
+                #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                    (PyUnicode_GET_SIZE(**name) != PyUnicode_GET_SIZE(key)) ? 1 :
+                #endif
+                    PyUnicode_Compare(**name, key);
+                if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                if (cmp == 0) {
+                    values[name-argnames] = value;
+                    break;
+                }
+                name++;
+            }
+            if (*name) continue;
+            else {
+                PyObject*** argname = argnames;
+                while (argname != first_kw_arg) {
+                    int cmp = (**argname == key) ? 0 :
+                    #if !CYTHON_COMPILING_IN_PYPY && PY_MAJOR_VERSION >= 3
+                        (PyUnicode_GET_SIZE(**argname) != PyUnicode_GET_SIZE(key)) ? 1 :
+                    #endif
+                        PyUnicode_Compare(**argname, key);
+                    if (cmp < 0 && unlikely(PyErr_Occurred())) goto bad;
+                    if (cmp == 0) goto arg_passed_twice;
+                    argname++;
+                }
+            }
+        } else
+            goto invalid_keyword_type;
+        if (kwds2) {
+            if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+        } else {
+            goto invalid_keyword;
+        }
+    }
+    return 0;
+arg_passed_twice:
+    __Pyx_RaiseDoubleKeywordsError(function_name, key);
+    goto bad;
+invalid_keyword_type:
+    PyErr_Format(PyExc_TypeError,
+        "%.200s() keywords must be strings", function_name);
+    goto bad;
+invalid_keyword:
+    PyErr_Format(PyExc_TypeError,
+    #if PY_MAJOR_VERSION < 3
+        "%.200s() got an unexpected keyword argument '%.200s'",
+        function_name, PyString_AsString(key));
+    #else
+        "%s() got an unexpected keyword argument '%U'",
+        function_name, key);
+    #endif
+bad:
+    return -1;
+}
+
+/* ArgTypeTest */
+static void __Pyx_RaiseArgumentTypeInvalid(const char* name, PyObject *obj, PyTypeObject *type) {
+    PyErr_Format(PyExc_TypeError,
+        "Argument '%.200s' has incorrect type (expected %.200s, got %.200s)",
+        name, type->tp_name, Py_TYPE(obj)->tp_name);
+}
+static CYTHON_INLINE int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+    const char *name, int exact)
+{
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (none_allowed && obj == Py_None) return 1;
+    else if (exact) {
+        if (likely(Py_TYPE(obj) == type)) return 1;
+        #if PY_MAJOR_VERSION == 2
+        else if ((type == &PyBaseString_Type) && likely(__Pyx_PyBaseString_CheckExact(obj))) return 1;
+        #endif
+    }
+    else {
+        if (likely(PyObject_TypeCheck(obj, type))) return 1;
+    }
+    __Pyx_RaiseArgumentTypeInvalid(name, obj, type);
+    return 0;
+}
+
+/* BufferFormatCheck */
+static CYTHON_INLINE int __Pyx_IsLittleEndian(void) {
+  unsigned int n = 1;
+  return *(unsigned char*)(&n) != 0;
+}
+static void __Pyx_BufFmt_Init(__Pyx_BufFmt_Context* ctx,
+                              __Pyx_BufFmt_StackElem* stack,
+                              __Pyx_TypeInfo* type) {
+  stack[0].field = &ctx->root;
+  stack[0].parent_offset = 0;
+  ctx->root.type = type;
+  ctx->root.name = "buffer dtype";
+  ctx->root.offset = 0;
+  ctx->head = stack;
+  ctx->head->field = &ctx->root;
+  ctx->fmt_offset = 0;
+  ctx->head->parent_offset = 0;
+  ctx->new_packmode = '@';
+  ctx->enc_packmode = '@';
+  ctx->new_count = 1;
+  ctx->enc_count = 0;
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  ctx->is_valid_array = 0;
+  ctx->struct_alignment = 0;
+  while (type->typegroup == 'S') {
+    ++ctx->head;
+    ctx->head->field = type->fields;
+    ctx->head->parent_offset = 0;
+    type = type->fields->type;
+  }
+}
+static int __Pyx_BufFmt_ParseNumber(const char** ts) {
+    int count;
+    const char* t = *ts;
+    if (*t < '0' || *t > '9') {
+      return -1;
+    } else {
+        count = *t++ - '0';
+        while (*t >= '0' && *t < '9') {
+            count *= 10;
+            count += *t++ - '0';
+        }
+    }
+    *ts = t;
+    return count;
+}
+static int __Pyx_BufFmt_ExpectNumber(const char **ts) {
+    int number = __Pyx_BufFmt_ParseNumber(ts);
+    if (number == -1)
+        PyErr_Format(PyExc_ValueError,\
+                     "Does not understand character buffer dtype format string ('%c')", **ts);
+    return number;
+}
+static void __Pyx_BufFmt_RaiseUnexpectedChar(char ch) {
+  PyErr_Format(PyExc_ValueError,
+               "Unexpected format string character: '%c'", ch);
+}
+static const char* __Pyx_BufFmt_DescribeTypeChar(char ch, int is_complex) {
+  switch (ch) {
+    case 'c': return "'char'";
+    case 'b': return "'signed char'";
+    case 'B': return "'unsigned char'";
+    case 'h': return "'short'";
+    case 'H': return "'unsigned short'";
+    case 'i': return "'int'";
+    case 'I': return "'unsigned int'";
+    case 'l': return "'long'";
+    case 'L': return "'unsigned long'";
+    case 'q': return "'long long'";
+    case 'Q': return "'unsigned long long'";
+    case 'f': return (is_complex ? "'complex float'" : "'float'");
+    case 'd': return (is_complex ? "'complex double'" : "'double'");
+    case 'g': return (is_complex ? "'complex long double'" : "'long double'");
+    case 'T': return "a struct";
+    case 'O': return "Python object";
+    case 'P': return "a pointer";
+    case 's': case 'p': return "a string";
+    case 0: return "end";
+    default: return "unparseable format string";
+  }
+}
+static size_t __Pyx_BufFmt_TypeCharToStandardSize(char ch, int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return 2;
+    case 'i': case 'I': case 'l': case 'L': return 4;
+    case 'q': case 'Q': return 8;
+    case 'f': return (is_complex ? 8 : 4);
+    case 'd': return (is_complex ? 16 : 8);
+    case 'g': {
+      PyErr_SetString(PyExc_ValueError, "Python does not define a standard format string size for long double ('g')..");
+      return 0;
+    }
+    case 'O': case 'P': return sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static size_t __Pyx_BufFmt_TypeCharToNativeSize(char ch, int is_complex) {
+  switch (ch) {
+    case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(short);
+    case 'i': case 'I': return sizeof(int);
+    case 'l': case 'L': return sizeof(long);
+    #ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(PY_LONG_LONG);
+    #endif
+    case 'f': return sizeof(float) * (is_complex ? 2 : 1);
+    case 'd': return sizeof(double) * (is_complex ? 2 : 1);
+    case 'g': return sizeof(long double) * (is_complex ? 2 : 1);
+    case 'O': case 'P': return sizeof(void*);
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+typedef struct { char c; short x; } __Pyx_st_short;
+typedef struct { char c; int x; } __Pyx_st_int;
+typedef struct { char c; long x; } __Pyx_st_long;
+typedef struct { char c; float x; } __Pyx_st_float;
+typedef struct { char c; double x; } __Pyx_st_double;
+typedef struct { char c; long double x; } __Pyx_st_longdouble;
+typedef struct { char c; void *x; } __Pyx_st_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { char c; PY_LONG_LONG x; } __Pyx_st_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToAlignment(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_st_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_st_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_st_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_st_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_st_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_st_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_st_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_st_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+/* These are for computing the padding at the end of the struct to align
+   on the first member of the struct. This will probably the same as above,
+   but we don't have any guarantees.
+ */
+typedef struct { short x; char c; } __Pyx_pad_short;
+typedef struct { int x; char c; } __Pyx_pad_int;
+typedef struct { long x; char c; } __Pyx_pad_long;
+typedef struct { float x; char c; } __Pyx_pad_float;
+typedef struct { double x; char c; } __Pyx_pad_double;
+typedef struct { long double x; char c; } __Pyx_pad_longdouble;
+typedef struct { void *x; char c; } __Pyx_pad_void_p;
+#ifdef HAVE_LONG_LONG
+typedef struct { PY_LONG_LONG x; char c; } __Pyx_pad_longlong;
+#endif
+static size_t __Pyx_BufFmt_TypeCharToPadding(char ch, CYTHON_UNUSED int is_complex) {
+  switch (ch) {
+    case '?': case 'c': case 'b': case 'B': case 's': case 'p': return 1;
+    case 'h': case 'H': return sizeof(__Pyx_pad_short) - sizeof(short);
+    case 'i': case 'I': return sizeof(__Pyx_pad_int) - sizeof(int);
+    case 'l': case 'L': return sizeof(__Pyx_pad_long) - sizeof(long);
+#ifdef HAVE_LONG_LONG
+    case 'q': case 'Q': return sizeof(__Pyx_pad_longlong) - sizeof(PY_LONG_LONG);
+#endif
+    case 'f': return sizeof(__Pyx_pad_float) - sizeof(float);
+    case 'd': return sizeof(__Pyx_pad_double) - sizeof(double);
+    case 'g': return sizeof(__Pyx_pad_longdouble) - sizeof(long double);
+    case 'P': case 'O': return sizeof(__Pyx_pad_void_p) - sizeof(void*);
+    default:
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+}
+static char __Pyx_BufFmt_TypeCharToGroup(char ch, int is_complex) {
+  switch (ch) {
+    case 'c':
+        return 'H';
+    case 'b': case 'h': case 'i':
+    case 'l': case 'q': case 's': case 'p':
+        return 'I';
+    case 'B': case 'H': case 'I': case 'L': case 'Q':
+        return 'U';
+    case 'f': case 'd': case 'g':
+        return (is_complex ? 'C' : 'R');
+    case 'O':
+        return 'O';
+    case 'P':
+        return 'P';
+    default: {
+      __Pyx_BufFmt_RaiseUnexpectedChar(ch);
+      return 0;
+    }
+  }
+}
+static void __Pyx_BufFmt_RaiseExpected(__Pyx_BufFmt_Context* ctx) {
+  if (ctx->head == NULL || ctx->head->field == &ctx->root) {
+    const char* expected;
+    const char* quote;
+    if (ctx->head == NULL) {
+      expected = "end";
+      quote = "";
+    } else {
+      expected = ctx->head->field->type->name;
+      quote = "'";
+    }
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected %s%s%s but got %s",
+                 quote, expected, quote,
+                 __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex));
+  } else {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_StructField* parent = (ctx->head - 1)->field;
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer dtype mismatch, expected '%s' but got %s in '%s.%s'",
+                 field->type->name, __Pyx_BufFmt_DescribeTypeChar(ctx->enc_type, ctx->is_complex),
+                 parent->type->name, field->name);
+  }
+}
+static int __Pyx_BufFmt_ProcessTypeChunk(__Pyx_BufFmt_Context* ctx) {
+  char group;
+  size_t size, offset, arraysize = 1;
+  if (ctx->enc_type == 0) return 0;
+  if (ctx->head->field->type->arraysize[0]) {
+    int i, ndim = 0;
+    if (ctx->enc_type == 's' || ctx->enc_type == 'p') {
+        ctx->is_valid_array = ctx->head->field->type->ndim == 1;
+        ndim = 1;
+        if (ctx->enc_count != ctx->head->field->type->arraysize[0]) {
+            PyErr_Format(PyExc_ValueError,
+                         "Expected a dimension of size %zu, got %zu",
+                         ctx->head->field->type->arraysize[0], ctx->enc_count);
+            return -1;
+        }
+    }
+    if (!ctx->is_valid_array) {
+      PyErr_Format(PyExc_ValueError, "Expected %d dimensions, got %d",
+                   ctx->head->field->type->ndim, ndim);
+      return -1;
+    }
+    for (i = 0; i < ctx->head->field->type->ndim; i++) {
+      arraysize *= ctx->head->field->type->arraysize[i];
+    }
+    ctx->is_valid_array = 0;
+    ctx->enc_count = 1;
+  }
+  group = __Pyx_BufFmt_TypeCharToGroup(ctx->enc_type, ctx->is_complex);
+  do {
+    __Pyx_StructField* field = ctx->head->field;
+    __Pyx_TypeInfo* type = field->type;
+    if (ctx->enc_packmode == '@' || ctx->enc_packmode == '^') {
+      size = __Pyx_BufFmt_TypeCharToNativeSize(ctx->enc_type, ctx->is_complex);
+    } else {
+      size = __Pyx_BufFmt_TypeCharToStandardSize(ctx->enc_type, ctx->is_complex);
+    }
+    if (ctx->enc_packmode == '@') {
+      size_t align_at = __Pyx_BufFmt_TypeCharToAlignment(ctx->enc_type, ctx->is_complex);
+      size_t align_mod_offset;
+      if (align_at == 0) return -1;
+      align_mod_offset = ctx->fmt_offset % align_at;
+      if (align_mod_offset > 0) ctx->fmt_offset += align_at - align_mod_offset;
+      if (ctx->struct_alignment == 0)
+          ctx->struct_alignment = __Pyx_BufFmt_TypeCharToPadding(ctx->enc_type,
+                                                                 ctx->is_complex);
+    }
+    if (type->size != size || type->typegroup != group) {
+      if (type->typegroup == 'C' && type->fields != NULL) {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        ++ctx->head;
+        ctx->head->field = type->fields;
+        ctx->head->parent_offset = parent_offset;
+        continue;
+      }
+      if ((type->typegroup == 'H' || group == 'H') && type->size == size) {
+      } else {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+      }
+    }
+    offset = ctx->head->parent_offset + field->offset;
+    if (ctx->fmt_offset != offset) {
+      PyErr_Format(PyExc_ValueError,
+                   "Buffer dtype mismatch; next field is at offset %" CYTHON_FORMAT_SSIZE_T "d but %" CYTHON_FORMAT_SSIZE_T "d expected",
+                   (Py_ssize_t)ctx->fmt_offset, (Py_ssize_t)offset);
+      return -1;
+    }
+    ctx->fmt_offset += size;
+    if (arraysize)
+      ctx->fmt_offset += (arraysize - 1) * size;
+    --ctx->enc_count;
+    while (1) {
+      if (field == &ctx->root) {
+        ctx->head = NULL;
+        if (ctx->enc_count != 0) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return -1;
+        }
+        break;
+      }
+      ctx->head->field = ++field;
+      if (field->type == NULL) {
+        --ctx->head;
+        field = ctx->head->field;
+        continue;
+      } else if (field->type->typegroup == 'S') {
+        size_t parent_offset = ctx->head->parent_offset + field->offset;
+        if (field->type->fields->type == NULL) continue;
+        field = field->type->fields;
+        ++ctx->head;
+        ctx->head->field = field;
+        ctx->head->parent_offset = parent_offset;
+        break;
+      } else {
+        break;
+      }
+    }
+  } while (ctx->enc_count);
+  ctx->enc_type = 0;
+  ctx->is_complex = 0;
+  return 0;
+}
+static CYTHON_INLINE PyObject *
+__pyx_buffmt_parse_array(__Pyx_BufFmt_Context* ctx, const char** tsp)
+{
+    const char *ts = *tsp;
+    int i = 0, number;
+    int ndim = ctx->head->field->type->ndim;
+;
+    ++ts;
+    if (ctx->new_count != 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Cannot handle repeated arrays in format string");
+        return NULL;
+    }
+    if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+    while (*ts && *ts != ')') {
+        switch (*ts) {
+            case ' ': case '\f': case '\r': case '\n': case '\t': case '\v':  continue;
+            default:  break;
+        }
+        number = __Pyx_BufFmt_ExpectNumber(&ts);
+        if (number == -1) return NULL;
+        if (i < ndim && (size_t) number != ctx->head->field->type->arraysize[i])
+            return PyErr_Format(PyExc_ValueError,
+                        "Expected a dimension of size %zu, got %d",
+                        ctx->head->field->type->arraysize[i], number);
+        if (*ts != ',' && *ts != ')')
+            return PyErr_Format(PyExc_ValueError,
+                                "Expected a comma in format string, got '%c'", *ts);
+        if (*ts == ',') ts++;
+        i++;
+    }
+    if (i != ndim)
+        return PyErr_Format(PyExc_ValueError, "Expected %d dimension(s), got %d",
+                            ctx->head->field->type->ndim, i);
+    if (!*ts) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Unexpected end of format string, expected ')'");
+        return NULL;
+    }
+    ctx->is_valid_array = 1;
+    ctx->new_count = 1;
+    *tsp = ++ts;
+    return Py_None;
+}
+static const char* __Pyx_BufFmt_CheckString(__Pyx_BufFmt_Context* ctx, const char* ts) {
+  int got_Z = 0;
+  while (1) {
+    switch(*ts) {
+      case 0:
+        if (ctx->enc_type != 0 && ctx->head == NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        if (ctx->head != NULL) {
+          __Pyx_BufFmt_RaiseExpected(ctx);
+          return NULL;
+        }
+        return ts;
+      case ' ':
+      case '\r':
+      case '\n':
+        ++ts;
+        break;
+      case '<':
+        if (!__Pyx_IsLittleEndian()) {
+          PyErr_SetString(PyExc_ValueError, "Little-endian buffer not supported on big-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '>':
+      case '!':
+        if (__Pyx_IsLittleEndian()) {
+          PyErr_SetString(PyExc_ValueError, "Big-endian buffer not supported on little-endian compiler");
+          return NULL;
+        }
+        ctx->new_packmode = '=';
+        ++ts;
+        break;
+      case '=':
+      case '@':
+      case '^':
+        ctx->new_packmode = *ts++;
+        break;
+      case 'T':
+        {
+          const char* ts_after_sub;
+          size_t i, struct_count = ctx->new_count;
+          size_t struct_alignment = ctx->struct_alignment;
+          ctx->new_count = 1;
+          ++ts;
+          if (*ts != '{') {
+            PyErr_SetString(PyExc_ValueError, "Buffer acquisition: Expected '{' after 'T'");
+            return NULL;
+          }
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          ctx->enc_count = 0;
+          ctx->struct_alignment = 0;
+          ++ts;
+          ts_after_sub = ts;
+          for (i = 0; i != struct_count; ++i) {
+            ts_after_sub = __Pyx_BufFmt_CheckString(ctx, ts);
+            if (!ts_after_sub) return NULL;
+          }
+          ts = ts_after_sub;
+          if (struct_alignment) ctx->struct_alignment = struct_alignment;
+        }
+        break;
+      case '}':
+        {
+          size_t alignment = ctx->struct_alignment;
+          ++ts;
+          if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+          ctx->enc_type = 0;
+          if (alignment && ctx->fmt_offset % alignment) {
+            ctx->fmt_offset += alignment - (ctx->fmt_offset % alignment);
+          }
+        }
+        return ts;
+      case 'x':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->fmt_offset += ctx->new_count;
+        ctx->new_count = 1;
+        ctx->enc_count = 0;
+        ctx->enc_type = 0;
+        ctx->enc_packmode = ctx->new_packmode;
+        ++ts;
+        break;
+      case 'Z':
+        got_Z = 1;
+        ++ts;
+        if (*ts != 'f' && *ts != 'd' && *ts != 'g') {
+          __Pyx_BufFmt_RaiseUnexpectedChar('Z');
+          return NULL;
+        }
+      case 'c': case 'b': case 'B': case 'h': case 'H': case 'i': case 'I':
+      case 'l': case 'L': case 'q': case 'Q':
+      case 'f': case 'd': case 'g':
+      case 'O': case 'p':
+        if (ctx->enc_type == *ts && got_Z == ctx->is_complex &&
+            ctx->enc_packmode == ctx->new_packmode) {
+          ctx->enc_count += ctx->new_count;
+          ctx->new_count = 1;
+          got_Z = 0;
+          ++ts;
+          break;
+        }
+      case 's':
+        if (__Pyx_BufFmt_ProcessTypeChunk(ctx) == -1) return NULL;
+        ctx->enc_count = ctx->new_count;
+        ctx->enc_packmode = ctx->new_packmode;
+        ctx->enc_type = *ts;
+        ctx->is_complex = got_Z;
+        ++ts;
+        ctx->new_count = 1;
+        got_Z = 0;
+        break;
+      case ':':
+        ++ts;
+        while(*ts != ':') ++ts;
+        ++ts;
+        break;
+      case '(':
+        if (!__pyx_buffmt_parse_array(ctx, &ts)) return NULL;
+        break;
+      default:
+        {
+          int number = __Pyx_BufFmt_ExpectNumber(&ts);
+          if (number == -1) return NULL;
+          ctx->new_count = (size_t)number;
+        }
+    }
+  }
+}
+static CYTHON_INLINE void __Pyx_ZeroBuffer(Py_buffer* buf) {
+  buf->buf = NULL;
+  buf->obj = NULL;
+  buf->strides = __Pyx_zeros;
+  buf->shape = __Pyx_zeros;
+  buf->suboffsets = __Pyx_minusones;
+}
+static CYTHON_INLINE int __Pyx_GetBufferAndValidate(
+        Py_buffer* buf, PyObject* obj,  __Pyx_TypeInfo* dtype, int flags,
+        int nd, int cast, __Pyx_BufFmt_StackElem* stack)
+{
+  if (obj == Py_None || obj == NULL) {
+    __Pyx_ZeroBuffer(buf);
+    return 0;
+  }
+  buf->buf = NULL;
+  if (__Pyx_GetBuffer(obj, buf, flags) == -1) goto fail;
+  if (buf->ndim != nd) {
+    PyErr_Format(PyExc_ValueError,
+                 "Buffer has wrong number of dimensions (expected %d, got %d)",
+                 nd, buf->ndim);
+    goto fail;
+  }
+  if (!cast) {
+    __Pyx_BufFmt_Context ctx;
+    __Pyx_BufFmt_Init(&ctx, stack, dtype);
+    if (!__Pyx_BufFmt_CheckString(&ctx, buf->format)) goto fail;
+  }
+  if ((unsigned)buf->itemsize != dtype->size) {
+    PyErr_Format(PyExc_ValueError,
+      "Item size of buffer (%" CYTHON_FORMAT_SSIZE_T "d byte%s) does not match size of '%s' (%" CYTHON_FORMAT_SSIZE_T "d byte%s)",
+      buf->itemsize, (buf->itemsize > 1) ? "s" : "",
+      dtype->name, (Py_ssize_t)dtype->size, (dtype->size > 1) ? "s" : "");
+    goto fail;
+  }
+  if (buf->suboffsets == NULL) buf->suboffsets = __Pyx_minusones;
+  return 0;
+fail:;
+  __Pyx_ZeroBuffer(buf);
+  return -1;
+}
+static CYTHON_INLINE void __Pyx_SafeReleaseBuffer(Py_buffer* info) {
+  if (info->buf == NULL) return;
+  if (info->suboffsets == __Pyx_minusones) info->suboffsets = NULL;
+  __Pyx_ReleaseBuffer(info);
+}
+
+/* GetBuiltinName */
+  static PyObject *__Pyx_GetBuiltinName(PyObject *name) {
+    PyObject* result = __Pyx_PyObject_GetAttrStr(__pyx_b, name);
+    if (unlikely(!result)) {
+        PyErr_Format(PyExc_NameError,
+#if PY_MAJOR_VERSION >= 3
+            "name '%U' is not defined", name);
+#else
+            "name '%.200s' is not defined", PyString_AS_STRING(name));
+#endif
+    }
+    return result;
+}
+
+/* GetModuleGlobalName */
+  static CYTHON_INLINE PyObject *__Pyx_GetModuleGlobalName(PyObject *name) {
+    PyObject *result;
+#if CYTHON_COMPILING_IN_CPYTHON
+    result = PyDict_GetItem(__pyx_d, name);
+    if (likely(result)) {
+        Py_INCREF(result);
+    } else {
+#else
+    result = PyObject_GetItem(__pyx_d, name);
+    if (!result) {
+        PyErr_Clear();
+#endif
+        result = __Pyx_GetBuiltinName(name);
+    }
+    return result;
+}
+
+/* PyObjectCall */
+    #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg, PyObject *kw) {
+    PyObject *result;
+    ternaryfunc call = func->ob_type->tp_call;
+    if (unlikely(!call))
+        return PyObject_Call(func, arg, kw);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = (*call)(func, arg, kw);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* ExtTypeTest */
+    static CYTHON_INLINE int __Pyx_TypeTest(PyObject *obj, PyTypeObject *type) {
+    if (unlikely(!type)) {
+        PyErr_SetString(PyExc_SystemError, "Missing type object");
+        return 0;
+    }
+    if (likely(PyObject_TypeCheck(obj, type)))
+        return 1;
+    PyErr_Format(PyExc_TypeError, "Cannot convert %.200s to %.200s",
+                 Py_TYPE(obj)->tp_name, type->tp_name);
+    return 0;
+}
+
+/* PyObjectCallMethO */
+    #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallMethO(PyObject *func, PyObject *arg) {
+    PyObject *self, *result;
+    PyCFunction cfunc;
+    cfunc = PyCFunction_GET_FUNCTION(func);
+    self = PyCFunction_GET_SELF(func);
+    if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
+        return NULL;
+    result = cfunc(self, arg);
+    Py_LeaveRecursiveCall();
+    if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
+        PyErr_SetString(
+            PyExc_SystemError,
+            "NULL result without error in PyObject_Call");
+    }
+    return result;
+}
+#endif
+
+/* PyObjectCallOneArg */
+    #if CYTHON_COMPILING_IN_CPYTHON
+static PyObject* __Pyx__PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_New(1);
+    if (unlikely(!args)) return NULL;
+    Py_INCREF(arg);
+    PyTuple_SET_ITEM(args, 0, arg);
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+#ifdef __Pyx_CyFunction_USED
+    if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) {
+#else
+    if (likely(PyCFunction_Check(func))) {
+#endif
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_O)) {
+            return __Pyx_PyObject_CallMethO(func, arg);
+        }
+    }
+    return __Pyx__PyObject_CallOneArg(func, arg);
+}
+#else
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
+    PyObject *result;
+    PyObject *args = PyTuple_Pack(1, arg);
+    if (unlikely(!args)) return NULL;
+    result = __Pyx_PyObject_Call(func, args, NULL);
+    Py_DECREF(args);
+    return result;
+}
+#endif
+
+/* PyObjectCallNoArg */
+      #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
+#ifdef __Pyx_CyFunction_USED
+    if (likely(PyCFunction_Check(func) || PyObject_TypeCheck(func, __pyx_CyFunctionType))) {
+#else
+    if (likely(PyCFunction_Check(func))) {
+#endif
+        if (likely(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
+            return __Pyx_PyObject_CallMethO(func, NULL);
+        }
+    }
+    return __Pyx_PyObject_Call(func, __pyx_empty_tuple, NULL);
+}
+#endif
+
+/* BufferIndexError */
+        static void __Pyx_RaiseBufferIndexError(int axis) {
+  PyErr_Format(PyExc_IndexError,
+     "Out of bounds on buffer access (axis %d)", axis);
+}
+
+/* SliceObject */
+        static CYTHON_INLINE PyObject* __Pyx_PyObject_GetSlice(PyObject* obj,
+        Py_ssize_t cstart, Py_ssize_t cstop,
+        PyObject** _py_start, PyObject** _py_stop, PyObject** _py_slice,
+        int has_cstart, int has_cstop, CYTHON_UNUSED int wraparound) {
+#if CYTHON_COMPILING_IN_CPYTHON
+    PyMappingMethods* mp;
+#if PY_MAJOR_VERSION < 3
+    PySequenceMethods* ms = Py_TYPE(obj)->tp_as_sequence;
+    if (likely(ms && ms->sq_slice)) {
+        if (!has_cstart) {
+            if (_py_start && (*_py_start != Py_None)) {
+                cstart = __Pyx_PyIndex_AsSsize_t(*_py_start);
+                if ((cstart == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+            } else
+                cstart = 0;
+        }
+        if (!has_cstop) {
+            if (_py_stop && (*_py_stop != Py_None)) {
+                cstop = __Pyx_PyIndex_AsSsize_t(*_py_stop);
+                if ((cstop == (Py_ssize_t)-1) && PyErr_Occurred()) goto bad;
+            } else
+                cstop = PY_SSIZE_T_MAX;
+        }
+        if (wraparound && unlikely((cstart < 0) | (cstop < 0)) && likely(ms->sq_length)) {
+            Py_ssize_t l = ms->sq_length(obj);
+            if (likely(l >= 0)) {
+                if (cstop < 0) {
+                    cstop += l;
+                    if (cstop < 0) cstop = 0;
+                }
+                if (cstart < 0) {
+                    cstart += l;
+                    if (cstart < 0) cstart = 0;
+                }
+            } else {
+                if (!PyErr_ExceptionMatches(PyExc_OverflowError))
+                    goto bad;
+                PyErr_Clear();
+            }
+        }
+        return ms->sq_slice(obj, cstart, cstop);
+    }
+#endif
+    mp = Py_TYPE(obj)->tp_as_mapping;
+    if (likely(mp && mp->mp_subscript))
+#endif
+    {
+        PyObject* result;
+        PyObject *py_slice, *py_start, *py_stop;
+        if (_py_slice) {
+            py_slice = *_py_slice;
+        } else {
+            PyObject* owned_start = NULL;
+            PyObject* owned_stop = NULL;
+            if (_py_start) {
+                py_start = *_py_start;
+            } else {
+                if (has_cstart) {
+                    owned_start = py_start = PyInt_FromSsize_t(cstart);
+                    if (unlikely(!py_start)) goto bad;
+                } else
+                    py_start = Py_None;
+            }
+            if (_py_stop) {
+                py_stop = *_py_stop;
+            } else {
+                if (has_cstop) {
+                    owned_stop = py_stop = PyInt_FromSsize_t(cstop);
+                    if (unlikely(!py_stop)) {
+                        Py_XDECREF(owned_start);
+                        goto bad;
+                    }
+                } else
+                    py_stop = Py_None;
+            }
+            py_slice = PySlice_New(py_start, py_stop, Py_None);
+            Py_XDECREF(owned_start);
+            Py_XDECREF(owned_stop);
+            if (unlikely(!py_slice)) goto bad;
+        }
+#if CYTHON_COMPILING_IN_CPYTHON
+        result = mp->mp_subscript(obj, py_slice);
+#else
+        result = PyObject_GetItem(obj, py_slice);
+#endif
+        if (!_py_slice) {
+            Py_DECREF(py_slice);
+        }
+        return result;
+    }
+    PyErr_Format(PyExc_TypeError,
+        "'%.200s' object is unsliceable", Py_TYPE(obj)->tp_name);
+bad:
+    return NULL;
+}
+
+/* BufferFallbackError */
+        static void __Pyx_RaiseBufferFallbackError(void) {
+  PyErr_SetString(PyExc_ValueError,
+     "Buffer acquisition failed on assignment; and then reacquiring the old buffer failed too!");
+}
+
+/* PyErrFetchRestore */
+        #if CYTHON_COMPILING_IN_CPYTHON
+static CYTHON_INLINE void __Pyx_ErrRestoreInState(PyThreadState *tstate, PyObject *type, PyObject *value, PyObject *tb) {
+    PyObject *tmp_type, *tmp_value, *tmp_tb;
+    tmp_type = tstate->curexc_type;
+    tmp_value = tstate->curexc_value;
+    tmp_tb = tstate->curexc_traceback;
+    tstate->curexc_type = type;
+    tstate->curexc_value = value;
+    tstate->curexc_traceback = tb;
+    Py_XDECREF(tmp_type);
+    Py_XDECREF(tmp_value);
+    Py_XDECREF(tmp_tb);
+}
+static CYTHON_INLINE void __Pyx_ErrFetchInState(PyThreadState *tstate, PyObject **type, PyObject **value, PyObject **tb) {
+    *type = tstate->curexc_type;
+    *value = tstate->curexc_value;
+    *tb = tstate->curexc_traceback;
+    tstate->curexc_type = 0;
+    tstate->curexc_value = 0;
+    tstate->curexc_traceback = 0;
+}
+#endif
+
+/* RaiseException */
+        #if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+                        CYTHON_UNUSED PyObject *cause) {
+    __Pyx_PyThreadState_declare
+    Py_XINCREF(type);
+    if (!value || value == Py_None)
+        value = NULL;
+    else
+        Py_INCREF(value);
+    if (!tb || tb == Py_None)
+        tb = NULL;
+    else {
+        Py_INCREF(tb);
+        if (!PyTraceBack_Check(tb)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: arg 3 must be a traceback or None");
+            goto raise_error;
+        }
+    }
+    if (PyType_Check(type)) {
+#if CYTHON_COMPILING_IN_PYPY
+        if (!value) {
+            Py_INCREF(Py_None);
+            value = Py_None;
+        }
+#endif
+        PyErr_NormalizeException(&type, &value, &tb);
+    } else {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto raise_error;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(type);
+        Py_INCREF(type);
+        if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+            PyErr_SetString(PyExc_TypeError,
+                "raise: exception class must be a subclass of BaseException");
+            goto raise_error;
+        }
+    }
+    __Pyx_PyThreadState_assign
+    __Pyx_ErrRestore(type, value, tb);
+    return;
+raise_error:
+    Py_XDECREF(value);
+    Py_XDECREF(type);
+    Py_XDECREF(tb);
+    return;
+}
+#else
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+    PyObject* owned_instance = NULL;
+    if (tb == Py_None) {
+        tb = 0;
+    } else if (tb && !PyTraceBack_Check(tb)) {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: arg 3 must be a traceback or None");
+        goto bad;
+    }
+    if (value == Py_None)
+        value = 0;
+    if (PyExceptionInstance_Check(type)) {
+        if (value) {
+            PyErr_SetString(PyExc_TypeError,
+                "instance exception may not have a separate value");
+            goto bad;
+        }
+        value = type;
+        type = (PyObject*) Py_TYPE(value);
+    } else if (PyExceptionClass_Check(type)) {
+        PyObject *instance_class = NULL;
+        if (value && PyExceptionInstance_Check(value)) {
+            instance_class = (PyObject*) Py_TYPE(value);
+            if (instance_class != type) {
+                int is_subclass = PyObject_IsSubclass(instance_class, type);
+                if (!is_subclass) {
+                    instance_class = NULL;
+                } else if (unlikely(is_subclass == -1)) {
+                    goto bad;
+                } else {
+                    type = instance_class;
+                }
+            }
+        }
+        if (!instance_class) {
+            PyObject *args;
+            if (!value)
+                args = PyTuple_New(0);
+            else if (PyTuple_Check(value)) {
+                Py_INCREF(value);
+                args = value;
+            } else
+                args = PyTuple_Pack(1, value);
+            if (!args)
+                goto bad;
+            owned_instance = PyObject_Call(type, args, NULL);
+            Py_DECREF(args);
+            if (!owned_instance)
+                goto bad;
+            value = owned_instance;
+            if (!PyExceptionInstance_Check(value)) {
+                PyErr_Format(PyExc_TypeError,
+                             "calling %R should have returned an instance of "
+                             "BaseException, not %R",
+                             type, Py_TYPE(value));
+                goto bad;
+            }
+        }
+    } else {
+        PyErr_SetString(PyExc_TypeError,
+            "raise: exception class must be a subclass of BaseException");
+        goto bad;
+    }
+#if PY_VERSION_HEX >= 0x03030000
+    if (cause) {
+#else
+    if (cause && cause != Py_None) {
+#endif
+        PyObject *fixed_cause;
+        if (cause == Py_None) {
+            fixed_cause = NULL;
+        } else if (PyExceptionClass_Check(cause)) {
+            fixed_cause = PyObject_CallObject(cause, NULL);
+            if (fixed_cause == NULL)
+                goto bad;
+        } else if (PyExceptionInstance_Check(cause)) {
+            fixed_cause = cause;
+            Py_INCREF(fixed_cause);
+        } else {
+            PyErr_SetString(PyExc_TypeError,
+                            "exception causes must derive from "
+                            "BaseException");
+            goto bad;
+        }
+        PyException_SetCause(value, fixed_cause);
+    }
+    PyErr_SetObject(type, value);
+    if (tb) {
+#if CYTHON_COMPILING_IN_PYPY
+        PyObject *tmp_type, *tmp_value, *tmp_tb;
+        PyErr_Fetch(&tmp_type, &tmp_value, &tmp_tb);
+        Py_INCREF(tb);
+        PyErr_Restore(tmp_type, tmp_value, tb);
+        Py_XDECREF(tmp_tb);
+#else
+        PyThreadState *tstate = PyThreadState_GET();
+        PyObject* tmp_tb = tstate->curexc_traceback;
+        if (tb != tmp_tb) {
+            Py_INCREF(tb);
+            tstate->curexc_traceback = tb;
+            Py_XDECREF(tmp_tb);
+        }
+#endif
+    }
+bad:
+    Py_XDECREF(owned_instance);
+    return;
+}
+#endif
+
+/* RaiseTooManyValuesToUnpack */
+          static CYTHON_INLINE void __Pyx_RaiseTooManyValuesError(Py_ssize_t expected) {
+    PyErr_Format(PyExc_ValueError,
+                 "too many values to unpack (expected %" CYTHON_FORMAT_SSIZE_T "d)", expected);
+}
+
+/* RaiseNeedMoreValuesToUnpack */
+          static CYTHON_INLINE void __Pyx_RaiseNeedMoreValuesError(Py_ssize_t index) {
+    PyErr_Format(PyExc_ValueError,
+                 "need more than %" CYTHON_FORMAT_SSIZE_T "d value%.1s to unpack",
+                 index, (index == 1) ? "" : "s");
+}
+
+/* RaiseNoneIterError */
+          static CYTHON_INLINE void __Pyx_RaiseNoneNotIterableError(void) {
+    PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable");
+}
+
+/* Import */
+          static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, int level) {
+    PyObject *empty_list = 0;
+    PyObject *module = 0;
+    PyObject *global_dict = 0;
+    PyObject *empty_dict = 0;
+    PyObject *list;
+    #if PY_VERSION_HEX < 0x03030000
+    PyObject *py_import;
+    py_import = __Pyx_PyObject_GetAttrStr(__pyx_b, __pyx_n_s_import);
+    if (!py_import)
+        goto bad;
+    #endif
+    if (from_list)
+        list = from_list;
+    else {
+        empty_list = PyList_New(0);
+        if (!empty_list)
+            goto bad;
+        list = empty_list;
+    }
+    global_dict = PyModule_GetDict(__pyx_m);
+    if (!global_dict)
+        goto bad;
+    empty_dict = PyDict_New();
+    if (!empty_dict)
+        goto bad;
+    {
+        #if PY_MAJOR_VERSION >= 3
+        if (level == -1) {
+            if (strchr(__Pyx_MODULE_NAME, '.')) {
+                #if PY_VERSION_HEX < 0x03030000
+                PyObject *py_level = PyInt_FromLong(1);
+                if (!py_level)
+                    goto bad;
+                module = PyObject_CallFunctionObjArgs(py_import,
+                    name, global_dict, empty_dict, list, py_level, NULL);
+                Py_DECREF(py_level);
+                #else
+                module = PyImport_ImportModuleLevelObject(
+                    name, global_dict, empty_dict, list, 1);
+                #endif
+                if (!module) {
+                    if (!PyErr_ExceptionMatches(PyExc_ImportError))
+                        goto bad;
+                    PyErr_Clear();
+                }
+            }
+            level = 0;
+        }
+        #endif
+        if (!module) {
+            #if PY_VERSION_HEX < 0x03030000
+            PyObject *py_level = PyInt_FromLong(level);
+            if (!py_level)
+                goto bad;
+            module = PyObject_CallFunctionObjArgs(py_import,
+                name, global_dict, empty_dict, list, py_level, NULL);
+            Py_DECREF(py_level);
+            #else
+            module = PyImport_ImportModuleLevelObject(
+                name, global_dict, empty_dict, list, level);
+            #endif
+        }
+    }
+bad:
+    #if PY_VERSION_HEX < 0x03030000
+    Py_XDECREF(py_import);
+    #endif
+    Py_XDECREF(empty_list);
+    Py_XDECREF(empty_dict);
+    return module;
+}
+
+/* CodeObjectCache */
+          static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+    int start = 0, mid = 0, end = count - 1;
+    if (end >= 0 && code_line > entries[end].code_line) {
+        return count;
+    }
+    while (start < end) {
+        mid = start + (end - start) / 2;
+        if (code_line < entries[mid].code_line) {
+            end = mid;
+        } else if (code_line > entries[mid].code_line) {
+             start = mid + 1;
+        } else {
+            return mid;
+        }
+    }
+    if (code_line <= entries[mid].code_line) {
+        return mid;
+    } else {
+        return mid + 1;
+    }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+    PyCodeObject* code_object;
+    int pos;
+    if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+        return NULL;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+        return NULL;
+    }
+    code_object = __pyx_code_cache.entries[pos].code_object;
+    Py_INCREF(code_object);
+    return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+    int pos, i;
+    __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+    if (unlikely(!code_line)) {
+        return;
+    }
+    if (unlikely(!entries)) {
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (likely(entries)) {
+            __pyx_code_cache.entries = entries;
+            __pyx_code_cache.max_count = 64;
+            __pyx_code_cache.count = 1;
+            entries[0].code_line = code_line;
+            entries[0].code_object = code_object;
+            Py_INCREF(code_object);
+        }
+        return;
+    }
+    pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+    if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+        PyCodeObject* tmp = entries[pos].code_object;
+        entries[pos].code_object = code_object;
+        Py_DECREF(tmp);
+        return;
+    }
+    if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+        int new_max = __pyx_code_cache.max_count + 64;
+        entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+            __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
+        if (unlikely(!entries)) {
+            return;
+        }
+        __pyx_code_cache.entries = entries;
+        __pyx_code_cache.max_count = new_max;
+    }
+    for (i=__pyx_code_cache.count; i>pos; i--) {
+        entries[i] = entries[i-1];
+    }
+    entries[pos].code_line = code_line;
+    entries[pos].code_object = code_object;
+    __pyx_code_cache.count++;
+    Py_INCREF(code_object);
+}
+
+/* AddTraceback */
+          #include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+            const char *funcname, int c_line,
+            int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyObject *py_srcfile = 0;
+    PyObject *py_funcname = 0;
+    #if PY_MAJOR_VERSION < 3
+    py_srcfile = PyString_FromString(filename);
+    #else
+    py_srcfile = PyUnicode_FromString(filename);
+    #endif
+    if (!py_srcfile) goto bad;
+    if (c_line) {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        #else
+        py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+        #endif
+    }
+    else {
+        #if PY_MAJOR_VERSION < 3
+        py_funcname = PyString_FromString(funcname);
+        #else
+        py_funcname = PyUnicode_FromString(funcname);
+        #endif
+    }
+    if (!py_funcname) goto bad;
+    py_code = __Pyx_PyCode_New(
+        0,
+        0,
+        0,
+        0,
+        0,
+        __pyx_empty_bytes, /*PyObject *code,*/
+        __pyx_empty_tuple, /*PyObject *consts,*/
+        __pyx_empty_tuple, /*PyObject *names,*/
+        __pyx_empty_tuple, /*PyObject *varnames,*/
+        __pyx_empty_tuple, /*PyObject *freevars,*/
+        __pyx_empty_tuple, /*PyObject *cellvars,*/
+        py_srcfile,   /*PyObject *filename,*/
+        py_funcname,  /*PyObject *name,*/
+        py_line,
+        __pyx_empty_bytes  /*PyObject *lnotab*/
+    );
+    Py_DECREF(py_srcfile);
+    Py_DECREF(py_funcname);
+    return py_code;
+bad:
+    Py_XDECREF(py_srcfile);
+    Py_XDECREF(py_funcname);
+    return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+                               int py_line, const char *filename) {
+    PyCodeObject *py_code = 0;
+    PyFrameObject *py_frame = 0;
+    py_code = __pyx_find_code_object(c_line ? c_line : py_line);
+    if (!py_code) {
+        py_code = __Pyx_CreateCodeObjectForTraceback(
+            funcname, c_line, py_line, filename);
+        if (!py_code) goto bad;
+        __pyx_insert_code_object(c_line ? c_line : py_line, py_code);
+    }
+    py_frame = PyFrame_New(
+        PyThreadState_GET(), /*PyThreadState *tstate,*/
+        py_code,             /*PyCodeObject *code,*/
+        __pyx_d,      /*PyObject *globals,*/
+        0                    /*PyObject *locals*/
+    );
+    if (!py_frame) goto bad;
+    py_frame->f_lineno = py_line;
+    PyTraceBack_Here(py_frame);
+bad:
+    Py_XDECREF(py_code);
+    Py_XDECREF(py_frame);
+}
+
+#if PY_MAJOR_VERSION < 3
+static int __Pyx_GetBuffer(PyObject *obj, Py_buffer *view, int flags) {
+    if (PyObject_CheckBuffer(obj)) return PyObject_GetBuffer(obj, view, flags);
+        if (PyObject_TypeCheck(obj, __pyx_ptype_5numpy_ndarray)) return __pyx_pw_5numpy_7ndarray_1__getbuffer__(obj, view, flags);
+    PyErr_Format(PyExc_TypeError, "'%.200s' does not have the buffer interface", Py_TYPE(obj)->tp_name);
+    return -1;
+}
+static void __Pyx_ReleaseBuffer(Py_buffer *view) {
+    PyObject *obj = view->obj;
+    if (!obj) return;
+    if (PyObject_CheckBuffer(obj)) {
+        PyBuffer_Release(view);
+        return;
+    }
+        if (PyObject_TypeCheck(obj, __pyx_ptype_5numpy_ndarray)) { __pyx_pw_5numpy_7ndarray_3__releasebuffer__(obj, view); return; }
+    Py_DECREF(obj);
+    view->obj = NULL;
+}
+#endif
+
+
+          /* CIntFromPyVerify */
+          #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
+#define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
+    __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
+#define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
+    {\
+        func_type value = func_value;\
+        if (sizeof(target_type) < sizeof(func_type)) {\
+            if (unlikely(value != (func_type) (target_type) value)) {\
+                func_type zero = 0;\
+                if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
+                    return (target_type) -1;\
+                if (is_unsigned && unlikely(value < zero))\
+                    goto raise_neg_overflow;\
+                else\
+                    goto raise_overflow;\
+            }\
+        }\
+        return (target_type) value;\
+    }
+
+/* CIntToPy */
+          static CYTHON_INLINE PyObject* __Pyx_PyInt_From_int(int value) {
+    const int neg_one = (int) -1, const_zero = (int) 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(int) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(int) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+        } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+        }
+    } else {
+        if (sizeof(int) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(int),
+                                     little, !is_unsigned);
+    }
+}
+
+/* None */
+          #if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return ::std::complex< float >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      return x + y*(__pyx_t_float_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_float_complex __pyx_t_float_complex_from_parts(float x, float y) {
+      __pyx_t_float_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* None */
+          #if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eqf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_sumf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_difff(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_prodf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_quotf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+        __pyx_t_float_complex z;
+        float denom = b.real * b.real + b.imag * b.imag;
+        z.real = (a.real * b.real + a.imag * b.imag) / denom;
+        z.imag = (a.imag * b.real - a.real * b.imag) / denom;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_negf(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zerof(__pyx_t_float_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_conjf(__pyx_t_float_complex a) {
+        __pyx_t_float_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE float __Pyx_c_absf(__pyx_t_float_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrtf(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypotf(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_float_complex __Pyx_c_powf(__pyx_t_float_complex a, __pyx_t_float_complex b) {
+            __pyx_t_float_complex z;
+            float r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    float denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        z = __Pyx_c_prodf(a, a);
+                        return __Pyx_c_prodf(a, a);
+                    case 3:
+                        z = __Pyx_c_prodf(a, a);
+                        return __Pyx_c_prodf(z, a);
+                    case 4:
+                        z = __Pyx_c_prodf(a, a);
+                        return __Pyx_c_prodf(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                }
+                r = a.real;
+                theta = 0;
+            } else {
+                r = __Pyx_c_absf(a);
+                theta = atan2f(a.imag, a.real);
+            }
+            lnr = logf(r);
+            z_r = expf(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cosf(z_theta);
+            z.imag = z_r * sinf(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* None */
+          #if CYTHON_CCOMPLEX
+  #ifdef __cplusplus
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return ::std::complex< double >(x, y);
+    }
+  #else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      return x + y*(__pyx_t_double_complex)_Complex_I;
+    }
+  #endif
+#else
+    static CYTHON_INLINE __pyx_t_double_complex __pyx_t_double_complex_from_parts(double x, double y) {
+      __pyx_t_double_complex z;
+      z.real = x;
+      z.imag = y;
+      return z;
+    }
+#endif
+
+/* None */
+          #if CYTHON_CCOMPLEX
+#else
+    static CYTHON_INLINE int __Pyx_c_eq(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+       return (a.real == b.real) && (a.imag == b.imag);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_sum(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real + b.real;
+        z.imag = a.imag + b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_diff(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real - b.real;
+        z.imag = a.imag - b.imag;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_prod(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        z.real = a.real * b.real - a.imag * b.imag;
+        z.imag = a.real * b.imag + a.imag * b.real;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_quot(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+        __pyx_t_double_complex z;
+        double denom = b.real * b.real + b.imag * b.imag;
+        z.real = (a.real * b.real + a.imag * b.imag) / denom;
+        z.imag = (a.imag * b.real - a.real * b.imag) / denom;
+        return z;
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_neg(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real = -a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    static CYTHON_INLINE int __Pyx_c_is_zero(__pyx_t_double_complex a) {
+       return (a.real == 0) && (a.imag == 0);
+    }
+    static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_conj(__pyx_t_double_complex a) {
+        __pyx_t_double_complex z;
+        z.real =  a.real;
+        z.imag = -a.imag;
+        return z;
+    }
+    #if 1
+        static CYTHON_INLINE double __Pyx_c_abs(__pyx_t_double_complex z) {
+          #if !defined(HAVE_HYPOT) || defined(_MSC_VER)
+            return sqrt(z.real*z.real + z.imag*z.imag);
+          #else
+            return hypot(z.real, z.imag);
+          #endif
+        }
+        static CYTHON_INLINE __pyx_t_double_complex __Pyx_c_pow(__pyx_t_double_complex a, __pyx_t_double_complex b) {
+            __pyx_t_double_complex z;
+            double r, lnr, theta, z_r, z_theta;
+            if (b.imag == 0 && b.real == (int)b.real) {
+                if (b.real < 0) {
+                    double denom = a.real * a.real + a.imag * a.imag;
+                    a.real = a.real / denom;
+                    a.imag = -a.imag / denom;
+                    b.real = -b.real;
+                }
+                switch ((int)b.real) {
+                    case 0:
+                        z.real = 1;
+                        z.imag = 0;
+                        return z;
+                    case 1:
+                        return a;
+                    case 2:
+                        z = __Pyx_c_prod(a, a);
+                        return __Pyx_c_prod(a, a);
+                    case 3:
+                        z = __Pyx_c_prod(a, a);
+                        return __Pyx_c_prod(z, a);
+                    case 4:
+                        z = __Pyx_c_prod(a, a);
+                        return __Pyx_c_prod(z, z);
+                }
+            }
+            if (a.imag == 0) {
+                if (a.real == 0) {
+                    return a;
+                }
+                r = a.real;
+                theta = 0;
+            } else {
+                r = __Pyx_c_abs(a);
+                theta = atan2(a.imag, a.real);
+            }
+            lnr = log(r);
+            z_r = exp(lnr * b.real - theta * b.imag);
+            z_theta = theta * b.real + lnr * b.imag;
+            z.real = z_r * cos(z_theta);
+            z.imag = z_r * sin(z_theta);
+            return z;
+        }
+    #endif
+#endif
+
+/* CIntToPy */
+          static CYTHON_INLINE PyObject* __Pyx_PyInt_From_enum__NPY_TYPES(enum NPY_TYPES value) {
+    const enum NPY_TYPES neg_one = (enum NPY_TYPES) -1, const_zero = (enum NPY_TYPES) 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(enum NPY_TYPES) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(enum NPY_TYPES) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+        } else if (sizeof(enum NPY_TYPES) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+        }
+    } else {
+        if (sizeof(enum NPY_TYPES) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(enum NPY_TYPES) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(enum NPY_TYPES),
+                                     little, !is_unsigned);
+    }
+}
+
+/* CIntFromPy */
+          static CYTHON_INLINE npy_int32 __Pyx_PyInt_As_npy_int32(PyObject *x) {
+    const npy_int32 neg_one = (npy_int32) -1, const_zero = (npy_int32) 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(npy_int32) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(npy_int32, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (npy_int32) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (npy_int32) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(npy_int32, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(npy_int32) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) >= 2 * PyLong_SHIFT) {
+                            return (npy_int32) (((((npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(npy_int32) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) >= 3 * PyLong_SHIFT) {
+                            return (npy_int32) (((((((npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(npy_int32) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) >= 4 * PyLong_SHIFT) {
+                            return (npy_int32) (((((((((npy_int32)digits[3]) << PyLong_SHIFT) | (npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (npy_int32) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(npy_int32) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, unsigned long, PyLong_AsUnsignedLong(x))
+            } else if (sizeof(npy_int32) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (npy_int32) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(npy_int32, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(npy_int32,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(npy_int32) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 2 * PyLong_SHIFT) {
+                            return (npy_int32) (((npy_int32)-1)*(((((npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(npy_int32) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 2 * PyLong_SHIFT) {
+                            return (npy_int32) ((((((npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(npy_int32) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 3 * PyLong_SHIFT) {
+                            return (npy_int32) (((npy_int32)-1)*(((((((npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(npy_int32) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 3 * PyLong_SHIFT) {
+                            return (npy_int32) ((((((((npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(npy_int32) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 4 * PyLong_SHIFT) {
+                            return (npy_int32) (((npy_int32)-1)*(((((((((npy_int32)digits[3]) << PyLong_SHIFT) | (npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(npy_int32) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(npy_int32, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(npy_int32) - 1 > 4 * PyLong_SHIFT) {
+                            return (npy_int32) ((((((((((npy_int32)digits[3]) << PyLong_SHIFT) | (npy_int32)digits[2]) << PyLong_SHIFT) | (npy_int32)digits[1]) << PyLong_SHIFT) | (npy_int32)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(npy_int32) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, long, PyLong_AsLong(x))
+            } else if (sizeof(npy_int32) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(npy_int32, PY_LONG_LONG, PyLong_AsLongLong(x))
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            npy_int32 val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (npy_int32) -1;
+        }
+    } else {
+        npy_int32 val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (npy_int32) -1;
+        val = __Pyx_PyInt_As_npy_int32(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to npy_int32");
+    return (npy_int32) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to npy_int32");
+    return (npy_int32) -1;
+}
+
+/* CIntFromPy */
+          static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
+    const int neg_one = (int) -1, const_zero = (int) 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(int) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (int) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (int) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) {
+                            return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) {
+                            return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) {
+                            return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (int) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(int) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
+            } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (int) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(int,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                            return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                            return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
+                            return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
+                            return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(int) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
+            } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            int val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (int) -1;
+        }
+    } else {
+        int val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (int) -1;
+        val = __Pyx_PyInt_As_int(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to int");
+    return (int) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to int");
+    return (int) -1;
+}
+
+/* CIntToPy */
+          static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
+    const long neg_one = (long) -1, const_zero = (long) 0;
+    const int is_unsigned = neg_one > const_zero;
+    if (is_unsigned) {
+        if (sizeof(long) < sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(long) <= sizeof(unsigned long)) {
+            return PyLong_FromUnsignedLong((unsigned long) value);
+        } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+            return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
+        }
+    } else {
+        if (sizeof(long) <= sizeof(long)) {
+            return PyInt_FromLong((long) value);
+        } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+            return PyLong_FromLongLong((PY_LONG_LONG) value);
+        }
+    }
+    {
+        int one = 1; int little = (int)*(unsigned char *)&one;
+        unsigned char *bytes = (unsigned char *)&value;
+        return _PyLong_FromByteArray(bytes, sizeof(long),
+                                     little, !is_unsigned);
+    }
+}
+
+/* CIntFromPy */
+          static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
+    const long neg_one = (long) -1, const_zero = (long) 0;
+    const int is_unsigned = neg_one > const_zero;
+#if PY_MAJOR_VERSION < 3
+    if (likely(PyInt_Check(x))) {
+        if (sizeof(long) < sizeof(long)) {
+            __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
+        } else {
+            long val = PyInt_AS_LONG(x);
+            if (is_unsigned && unlikely(val < 0)) {
+                goto raise_neg_overflow;
+            }
+            return (long) val;
+        }
+    } else
+#endif
+    if (likely(PyLong_Check(x))) {
+        if (is_unsigned) {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (long) 0;
+                case  1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0])
+                case 2:
+                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) {
+                            return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) {
+                            return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) {
+                            return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
+                        }
+                    }
+                    break;
+            }
+#endif
+#if CYTHON_COMPILING_IN_CPYTHON
+            if (unlikely(Py_SIZE(x) < 0)) {
+                goto raise_neg_overflow;
+            }
+#else
+            {
+                int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
+                if (unlikely(result < 0))
+                    return (long) -1;
+                if (unlikely(result == 1))
+                    goto raise_neg_overflow;
+            }
+#endif
+            if (sizeof(long) <= sizeof(unsigned long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
+            } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
+            }
+        } else {
+#if CYTHON_USE_PYLONG_INTERNALS
+            const digit* digits = ((PyLongObject*)x)->ob_digit;
+            switch (Py_SIZE(x)) {
+                case  0: return (long) 0;
+                case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, (sdigit) (-(sdigit)digits[0]))
+                case  1: __PYX_VERIFY_RETURN_INT(long,  digit, +digits[0])
+                case -2:
+                    if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 2:
+                    if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                            return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -3:
+                    if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 3:
+                    if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                            return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case -4:
+                    if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                            return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+                case 4:
+                    if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
+                        if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
+                            __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
+                        } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
+                            return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
+                        }
+                    }
+                    break;
+            }
+#endif
+            if (sizeof(long) <= sizeof(long)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
+            } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
+                __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
+            }
+        }
+        {
+#if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
+            PyErr_SetString(PyExc_RuntimeError,
+                            "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
+#else
+            long val;
+            PyObject *v = __Pyx_PyNumber_IntOrLong(x);
+ #if PY_MAJOR_VERSION < 3
+            if (likely(v) && !PyLong_Check(v)) {
+                PyObject *tmp = v;
+                v = PyNumber_Long(tmp);
+                Py_DECREF(tmp);
+            }
+ #endif
+            if (likely(v)) {
+                int one = 1; int is_little = (int)*(unsigned char *)&one;
+                unsigned char *bytes = (unsigned char *)&val;
+                int ret = _PyLong_AsByteArray((PyLongObject *)v,
+                                              bytes, sizeof(val),
+                                              is_little, !is_unsigned);
+                Py_DECREF(v);
+                if (likely(!ret))
+                    return val;
+            }
+#endif
+            return (long) -1;
+        }
+    } else {
+        long val;
+        PyObject *tmp = __Pyx_PyNumber_IntOrLong(x);
+        if (!tmp) return (long) -1;
+        val = __Pyx_PyInt_As_long(tmp);
+        Py_DECREF(tmp);
+        return val;
+    }
+raise_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "value too large to convert to long");
+    return (long) -1;
+raise_neg_overflow:
+    PyErr_SetString(PyExc_OverflowError,
+        "can't convert negative value to long");
+    return (long) -1;
+}
+
+/* CheckBinaryVersion */
+          static int __Pyx_check_binary_version(void) {
+    char ctversion[4], rtversion[4];
+    PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+    PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
+    if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
+        char message[200];
+        PyOS_snprintf(message, sizeof(message),
+                      "compiletime version %s of module '%.100s' "
+                      "does not match runtime version %s",
+                      ctversion, __Pyx_MODULE_NAME, rtversion);
+        return PyErr_WarnEx(NULL, message, 1);
+    }
+    return 0;
+}
+
+/* ModuleImport */
+          #ifndef __PYX_HAVE_RT_ImportModule
+#define __PYX_HAVE_RT_ImportModule
+static PyObject *__Pyx_ImportModule(const char *name) {
+    PyObject *py_name = 0;
+    PyObject *py_module = 0;
+    py_name = __Pyx_PyIdentifier_FromString(name);
+    if (!py_name)
+        goto bad;
+    py_module = PyImport_Import(py_name);
+    Py_DECREF(py_name);
+    return py_module;
+bad:
+    Py_XDECREF(py_name);
+    return 0;
+}
+#endif
+
+/* TypeImport */
+          #ifndef __PYX_HAVE_RT_ImportType
+#define __PYX_HAVE_RT_ImportType
+static PyTypeObject *__Pyx_ImportType(const char *module_name, const char *class_name,
+    size_t size, int strict)
+{
+    PyObject *py_module = 0;
+    PyObject *result = 0;
+    PyObject *py_name = 0;
+    char warning[200];
+    Py_ssize_t basicsize;
+#ifdef Py_LIMITED_API
+    PyObject *py_basicsize;
+#endif
+    py_module = __Pyx_ImportModule(module_name);
+    if (!py_module)
+        goto bad;
+    py_name = __Pyx_PyIdentifier_FromString(class_name);
+    if (!py_name)
+        goto bad;
+    result = PyObject_GetAttr(py_module, py_name);
+    Py_DECREF(py_name);
+    py_name = 0;
+    Py_DECREF(py_module);
+    py_module = 0;
+    if (!result)
+        goto bad;
+    if (!PyType_Check(result)) {
+        PyErr_Format(PyExc_TypeError,
+            "%.200s.%.200s is not a type object",
+            module_name, class_name);
+        goto bad;
+    }
+#ifndef Py_LIMITED_API
+    basicsize = ((PyTypeObject *)result)->tp_basicsize;
+#else
+    py_basicsize = PyObject_GetAttrString(result, "__basicsize__");
+    if (!py_basicsize)
+        goto bad;
+    basicsize = PyLong_AsSsize_t(py_basicsize);
+    Py_DECREF(py_basicsize);
+    py_basicsize = 0;
+    if (basicsize == (Py_ssize_t)-1 && PyErr_Occurred())
+        goto bad;
+#endif
+    if (!strict && (size_t)basicsize > size) {
+        PyOS_snprintf(warning, sizeof(warning),
+            "%s.%s size changed, may indicate binary incompatibility. Expected %zd, got %zd",
+            module_name, class_name, basicsize, size);
+        if (PyErr_WarnEx(NULL, warning, 0) < 0) goto bad;
+    }
+    else if ((size_t)basicsize != size) {
+        PyErr_Format(PyExc_ValueError,
+            "%.200s.%.200s has the wrong size, try recompiling. Expected %zd, got %zd",
+            module_name, class_name, basicsize, size);
+        goto bad;
+    }
+    return (PyTypeObject *)result;
+bad:
+    Py_XDECREF(py_module);
+    Py_XDECREF(result);
+    return NULL;
+}
+#endif
+
+/* InitStrings */
+          static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+    while (t->p) {
+        #if PY_MAJOR_VERSION < 3
+        if (t->is_unicode) {
+            *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+        } else if (t->intern) {
+            *t->p = PyString_InternFromString(t->s);
+        } else {
+            *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+        }
+        #else
+        if (t->is_unicode | t->is_str) {
+            if (t->intern) {
+                *t->p = PyUnicode_InternFromString(t->s);
+            } else if (t->encoding) {
+                *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+            } else {
+                *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+            }
+        } else {
+            *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+        }
+        #endif
+        if (!*t->p)
+            return -1;
+        ++t;
+    }
+    return 0;
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
+    return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
+}
+static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
+    Py_ssize_t ignore;
+    return __Pyx_PyObject_AsStringAndSize(o, &ignore);
+}
+static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
+#if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
+    if (
+#if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+            __Pyx_sys_getdefaultencoding_not_ascii &&
+#endif
+            PyUnicode_Check(o)) {
+#if PY_VERSION_HEX < 0x03030000
+        char* defenc_c;
+        PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
+        if (!defenc) return NULL;
+        defenc_c = PyBytes_AS_STRING(defenc);
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+        {
+            char* end = defenc_c + PyBytes_GET_SIZE(defenc);
+            char* c;
+            for (c = defenc_c; c < end; c++) {
+                if ((unsigned char) (*c) >= 128) {
+                    PyUnicode_AsASCIIString(o);
+                    return NULL;
+                }
+            }
+        }
+#endif
+        *length = PyBytes_GET_SIZE(defenc);
+        return defenc_c;
+#else
+        if (__Pyx_PyUnicode_READY(o) == -1) return NULL;
+#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
+        if (PyUnicode_IS_ASCII(o)) {
+            *length = PyUnicode_GET_LENGTH(o);
+            return PyUnicode_AsUTF8(o);
+        } else {
+            PyUnicode_AsASCIIString(o);
+            return NULL;
+        }
+#else
+        return PyUnicode_AsUTF8AndSize(o, length);
+#endif
+#endif
+    } else
+#endif
+#if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
+    if (PyByteArray_Check(o)) {
+        *length = PyByteArray_GET_SIZE(o);
+        return PyByteArray_AS_STRING(o);
+    } else
+#endif
+    {
+        char* result;
+        int r = PyBytes_AsStringAndSize(o, &result, length);
+        if (unlikely(r < 0)) {
+            return NULL;
+        } else {
+            return result;
+        }
+    }
+}
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+   int is_true = x == Py_True;
+   if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+   else return PyObject_IsTrue(x);
+}
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_IntOrLong(PyObject* x) {
+  PyNumberMethods *m;
+  const char *name = NULL;
+  PyObject *res = NULL;
+#if PY_MAJOR_VERSION < 3
+  if (PyInt_Check(x) || PyLong_Check(x))
+#else
+  if (PyLong_Check(x))
+#endif
+    return __Pyx_NewRef(x);
+  m = Py_TYPE(x)->tp_as_number;
+#if PY_MAJOR_VERSION < 3
+  if (m && m->nb_int) {
+    name = "int";
+    res = PyNumber_Int(x);
+  }
+  else if (m && m->nb_long) {
+    name = "long";
+    res = PyNumber_Long(x);
+  }
+#else
+  if (m && m->nb_int) {
+    name = "int";
+    res = PyNumber_Long(x);
+  }
+#endif
+  if (res) {
+#if PY_MAJOR_VERSION < 3
+    if (!PyInt_Check(res) && !PyLong_Check(res)) {
+#else
+    if (!PyLong_Check(res)) {
+#endif
+      PyErr_Format(PyExc_TypeError,
+                   "__%.4s__ returned non-%.4s (type %.200s)",
+                   name, name, Py_TYPE(res)->tp_name);
+      Py_DECREF(res);
+      return NULL;
+    }
+  }
+  else if (!PyErr_Occurred()) {
+    PyErr_SetString(PyExc_TypeError,
+                    "an integer is required");
+  }
+  return res;
+}
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+  Py_ssize_t ival;
+  PyObject *x;
+#if PY_MAJOR_VERSION < 3
+  if (likely(PyInt_CheckExact(b))) {
+    if (sizeof(Py_ssize_t) >= sizeof(long))
+        return PyInt_AS_LONG(b);
+    else
+        return PyInt_AsSsize_t(x);
+  }
+#endif
+  if (likely(PyLong_CheckExact(b))) {
+    #if CYTHON_USE_PYLONG_INTERNALS
+    const digit* digits = ((PyLongObject*)b)->ob_digit;
+    const Py_ssize_t size = Py_SIZE(b);
+    if (likely(__Pyx_sst_abs(size) <= 1)) {
+        ival = likely(size) ? digits[0] : 0;
+        if (size == -1) ival = -ival;
+        return ival;
+    } else {
+      switch (size) {
+         case 2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -2:
+           if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -3:
+           if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case 4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+         case -4:
+           if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
+             return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
+           }
+           break;
+      }
+    }
+    #endif
+    return PyLong_AsSsize_t(b);
+  }
+  x = PyNumber_Index(b);
+  if (!x) return -1;
+  ival = PyInt_AsSsize_t(x);
+  Py_DECREF(x);
+  return ival;
+}
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+    return PyInt_FromSize_t(ival);
+}
+
+
+#endif /* Py_PYTHON_H */
+
diff --git a/lib/nms/gpu_nms.hpp b/lib/nms/gpu_nms.hpp
new file mode 100644
index 0000000..68b6d42
--- /dev/null
+++ b/lib/nms/gpu_nms.hpp
@@ -0,0 +1,2 @@
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id);
diff --git a/lib/nms/gpu_nms.pyx b/lib/nms/gpu_nms.pyx
new file mode 100644
index 0000000..e637d3c
--- /dev/null
+++ b/lib/nms/gpu_nms.pyx
@@ -0,0 +1,34 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+cimport numpy as np
+
+assert sizeof(int) == sizeof(np.int32_t)
+
+cdef extern from "gpu_nms.hpp":
+    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+            np.int32_t device_id=0):
+    cdef int boxes_num = dets.shape[0]
+    cdef int boxes_dim = dets.shape[1]
+    cdef int num_out
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        keep = np.zeros(boxes_num, dtype=np.int32)
+    cdef np.ndarray[np.float32_t, ndim=1] \
+        scores = dets[:, 4]
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        order = scores.argsort()[::-1].astype(np.int32)
+    cdef np.ndarray[np.float32_t, ndim=2] \
+        sorted_dets = dets[order, :]
+    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+    keep = keep[:num_out]
+    return list(order[keep])
diff --git a/lib/nms/nms.py b/lib/nms/nms.py
new file mode 100644
index 0000000..7f83e05
--- /dev/null
+++ b/lib/nms/nms.py
@@ -0,0 +1,180 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from .cpu_nms import cpu_nms
+from .gpu_nms import gpu_nms
+
+
+def py_nms_wrapper(thresh):
+    def _nms(dets):
+        return nms(dets, thresh)
+    return _nms
+
+
+def cpu_nms_wrapper(thresh):
+    def _nms(dets):
+        return cpu_nms(dets, thresh)
+    return _nms
+
+
+def gpu_nms_wrapper(thresh, device_id):
+    def _nms(dets):
+        return gpu_nms(dets, thresh, device_id)
+    return _nms
+
+
+def nms(dets, thresh):
+    """
+    greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh
+    :param dets: [[x1, y1, x2, y2 score]]
+    :param thresh: retain overlap < thresh
+    :return: indexes to keep
+    """
+    if dets.shape[0] == 0:
+        return []
+
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def oks_iou(g, d, a_g, a_d, sigmas=None, in_vis_thre=None):
+    if not isinstance(sigmas, np.ndarray):
+        sigmas = np.array([.26, .25, .25, .35, .35, .79, .79, .72, .72, .62, .62, 1.07, 1.07, .87, .87, .89, .89]) / 10.0
+    vars = (sigmas * 2) ** 2
+    xg = g[0::3]
+    yg = g[1::3]
+    vg = g[2::3]
+    ious = np.zeros((d.shape[0]))
+    for n_d in range(0, d.shape[0]):
+        xd = d[n_d, 0::3]
+        yd = d[n_d, 1::3]
+        vd = d[n_d, 2::3]
+        dx = xd - xg
+        dy = yd - yg
+        e = (dx ** 2 + dy ** 2) / vars / ((a_g + a_d[n_d]) / 2 + np.spacing(1)) / 2
+        if in_vis_thre is not None:
+            ind = list(vg > in_vis_thre) and list(vd > in_vis_thre)
+            e = e[ind]
+        ious[n_d] = np.sum(np.exp(-e)) / e.shape[0] if e.shape[0] != 0 else 0.0
+    return ious
+
+
+def oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """
+    greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh, overlap = oks
+    :param kpts_db
+    :param thresh: retain overlap < thresh
+    :return: indexes to keep
+    """
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
+
+        inds = np.where(oks_ovr <= thresh)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def rescore(overlap, scores, thresh, type='gaussian'):
+    assert overlap.shape[0] == scores.shape[0]
+    if type == 'linear':
+        inds = np.where(overlap >= thresh)[0]
+        scores[inds] = scores[inds] * (1 - overlap[inds])
+    else:
+        scores = scores * np.exp(- overlap**2 / thresh)
+
+    return scores
+
+
+def soft_oks_nms(kpts_db, thresh, sigmas=None, in_vis_thre=None):
+    """
+    greedily select boxes with high confidence and overlap with current maximum <= thresh
+    rule out overlap >= thresh, overlap = oks
+    :param kpts_db
+    :param thresh: retain overlap < thresh
+    :return: indexes to keep
+    """
+    if len(kpts_db) == 0:
+        return []
+
+    scores = np.array([kpts_db[i]['score'] for i in range(len(kpts_db))])
+    kpts = np.array([kpts_db[i]['keypoints'].flatten() for i in range(len(kpts_db))])
+    areas = np.array([kpts_db[i]['area'] for i in range(len(kpts_db))])
+
+    order = scores.argsort()[::-1]
+    scores = scores[order]
+
+    # max_dets = order.size
+    max_dets = 20
+    keep = np.zeros(max_dets, dtype=np.intp)
+    keep_cnt = 0
+    while order.size > 0 and keep_cnt < max_dets:
+        i = order[0]
+
+        oks_ovr = oks_iou(kpts[i], kpts[order[1:]], areas[i], areas[order[1:]], sigmas, in_vis_thre)
+
+        order = order[1:]
+        scores = rescore(oks_ovr, scores[1:], thresh)
+
+        tmp = scores.argsort()[::-1]
+        order = order[tmp]
+        scores = scores[tmp]
+
+        keep[keep_cnt] = i
+        keep_cnt += 1
+
+    keep = keep[:keep_cnt]
+
+    return keep
+    # kpts_db = kpts_db[:keep_cnt]
+
+    # return kpts_db
diff --git a/lib/nms/nms_kernel.cu b/lib/nms/nms_kernel.cu
new file mode 100644
index 0000000..f6176c6
--- /dev/null
+++ b/lib/nms/nms_kernel.cu
@@ -0,0 +1,143 @@
+// ------------------------------------------------------------------
+// Copyright (c) Microsoft
+// Licensed under The MIT License
+// Modified from MATLAB Faster R-CNN (https://github.com/shaoqingren/faster_rcnn)
+// ------------------------------------------------------------------
+
+#include "gpu_nms.hpp"
+#include <vector>
+#include <iostream>
+
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    if (error != cudaSuccess) { \
+      std::cout << cudaGetErrorString(error) << std::endl; \
+    } \
+  } while (0)
+
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+
+  // if (row_start > col_start) return;
+
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+
+void _set_device(int device_id) {
+  int current_device;
+  CUDA_CHECK(cudaGetDevice(&current_device));
+  if (current_device == device_id) {
+    return;
+  }
+  // The call to cudaSetDevice must come before any calls to Get, which
+  // may perform initialization using the GPU.
+  CUDA_CHECK(cudaSetDevice(device_id));
+}
+
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id) {
+  _set_device(device_id);
+
+  float* boxes_dev = NULL;
+  unsigned long long* mask_dev = NULL;
+
+  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+
+  CUDA_CHECK(cudaMalloc(&boxes_dev,
+                        boxes_num * boxes_dim * sizeof(float)));
+  CUDA_CHECK(cudaMemcpy(boxes_dev,
+                        boxes_host,
+                        boxes_num * boxes_dim * sizeof(float),
+                        cudaMemcpyHostToDevice));
+
+  CUDA_CHECK(cudaMalloc(&mask_dev,
+                        boxes_num * col_blocks * sizeof(unsigned long long)));
+
+  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
+              DIVUP(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  CUDA_CHECK(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  *num_out = num_to_keep;
+
+  CUDA_CHECK(cudaFree(boxes_dev));
+  CUDA_CHECK(cudaFree(mask_dev));
+}
diff --git a/lib/nms/setup_linux.py b/lib/nms/setup_linux.py
new file mode 100644
index 0000000..9120a93
--- /dev/null
+++ b/lib/nms/setup_linux.py
@@ -0,0 +1,141 @@
+# --------------------------------------------------------
+# Pose.gluon
+# Copyright (c) 2018-present Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Modified from py-faster-rcnn (https://github.com/rbgirshick/py-faster-rcnn)
+# --------------------------------------------------------
+
+import os
+from os.path import join as pjoin
+from setuptools import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+import numpy as np
+
+
+def find_in_path(name, path):
+    "Find a file in a search path"
+    # Adapted fom
+    # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
+    for dir in path.split(os.pathsep):
+        binpath = pjoin(dir, name)
+        if os.path.exists(binpath):
+            return os.path.abspath(binpath)
+    return None
+
+
+def locate_cuda():
+    """Locate the CUDA environment on the system
+    Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
+    and values giving the absolute path to each directory.
+    Starts by looking for the CUDAHOME env variable. If not found, everything
+    is based on finding 'nvcc' in the PATH.
+    """
+
+    # first check if the CUDAHOME env variable is in use
+    if 'CUDAHOME' in os.environ:
+        home = os.environ['CUDAHOME']
+        nvcc = pjoin(home, 'bin', 'nvcc')
+    else:
+        # otherwise, search the PATH for NVCC
+        default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
+        nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
+        if nvcc is None:
+            raise EnvironmentError('The nvcc binary could not be '
+                'located in your $PATH. Either add it to your path, or set $CUDAHOME')
+        home = os.path.dirname(os.path.dirname(nvcc))
+
+    cudaconfig = {'home':home, 'nvcc':nvcc,
+                  'include': pjoin(home, 'include'),
+                  'lib64': pjoin(home, 'lib64')}
+    for k, v in cudaconfig.items():
+        if not os.path.exists(v):
+            raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
+
+    return cudaconfig
+CUDA = locate_cuda()
+
+
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+
+
+def customize_compiler_for_nvcc(self):
+    """inject deep into distutils to customize how the dispatch
+    to gcc/nvcc works.
+    If you subclass UnixCCompiler, it's not trivial to get your subclass
+    injected in, and still have the right customizations (i.e.
+    distutils.sysconfig.customize_compiler) run on it. So instead of going
+    the OO route, I have this. Note, it's kindof like a wierd functional
+    subclassing going on."""
+
+    # tell the compiler it can processes .cu
+    self.src_extensions.append('.cu')
+
+    # save references to the default compiler_so and _comple methods
+    default_compiler_so = self.compiler_so
+    super = self._compile
+
+    # now redefine the _compile method. This gets executed for each
+    # object but distutils doesn't have the ability to change compilers
+    # based on source extension: we add it.
+    def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
+        if os.path.splitext(src)[1] == '.cu':
+            # use the cuda for .cu files
+            self.set_executable('compiler_so', CUDA['nvcc'])
+            # use only a subset of the extra_postargs, which are 1-1 translated
+            # from the extra_compile_args in the Extension class
+            postargs = extra_postargs['nvcc']
+        else:
+            postargs = extra_postargs['gcc']
+
+        super(obj, src, ext, cc_args, postargs, pp_opts)
+        # reset the default compiler_so, which we might have changed for cuda
+        self.compiler_so = default_compiler_so
+
+    # inject our redefined _compile method into the class
+    self._compile = _compile
+
+
+# run the customize_compiler
+class custom_build_ext(build_ext):
+    def build_extensions(self):
+        customize_compiler_for_nvcc(self.compiler)
+        build_ext.build_extensions(self)
+
+
+ext_modules = [
+    Extension(
+        "cpu_nms",
+        ["cpu_nms.pyx"],
+        extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+        include_dirs = [numpy_include]
+    ),
+    Extension('gpu_nms',
+        ['nms_kernel.cu', 'gpu_nms.pyx'],
+        library_dirs=[CUDA['lib64']],
+        libraries=['cudart'],
+        language='c++',
+        runtime_library_dirs=[CUDA['lib64']],
+        # this syntax is specific to this build system
+        # we're only going to use certain compiler args with nvcc and not with
+        # gcc the implementation of this trick is in customize_compiler() below
+        extra_compile_args={'gcc': ["-Wno-unused-function"],
+                            'nvcc': ['-arch=sm_35',
+                                     '--ptxas-options=-v',
+                                     '-c',
+                                     '--compiler-options',
+                                     "'-fPIC'"]},
+        include_dirs = [numpy_include, CUDA['include']]
+    ),
+]
+
+setup(
+    name='nms',
+    ext_modules=ext_modules,
+    # inject our custom trigger
+    cmdclass={'build_ext': custom_build_ext},
+)
diff --git a/lib/utils/__init__.py b/lib/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/lib/utils/assocembedutil.py b/lib/utils/assocembedutil.py
new file mode 100644
index 0000000..5e58acd
--- /dev/null
+++ b/lib/utils/assocembedutil.py
@@ -0,0 +1,376 @@
+import math
+import statistics
+import torch
+
+
+def localmax2D(data, min_thresh, min_dist):
+    """
+    Finds the local maxima for the given data tensor. All computationally intensive
+    operations are handled by pytorch on the same device as the given data tensor.
+
+    Parameters:
+        data (tensor): the tensor that we search for local maxima. This tensor must
+        have at least two dimensions (d1, d2, d3, ..., dn, rows, cols). Each 2D
+        (rows, cols) slice will be searched for local maxima. If neighboring pixels
+        have the same value we follow a simple tie breaking algorithm. The tie will
+        be broken by considering the pixel with the larger row index greater, or if the
+        rows are also equal, the pixel with the larger column index is considered
+        greater.
+
+        min_thresh (number): any pixels below this threshold will be excluded
+        from the local maxima search
+
+        min_dist (integer): minimum neighborhood size in pixels. We search a square
+        neighborhood around each pixel min_dist pixels out. If a pixel is the largest
+        value in this neighborhood we mark it a local maxima. All pixels within
+        min_dist of the 2D boundary are excluded from the local maxima search. This
+        parameter must be >= 1.
+
+    Returns:
+        A boolean tensor with the same shape as data and on the same device. Elements
+        will be True where a local maxima was detected and False elsewhere.
+    """
+
+    assert min_dist >= 1
+
+    data_size = list(data.size())
+
+    accum_mask = data >= min_thresh
+
+    # mask out a frame around the 2D space the size of min_dist
+    accum_mask[..., :min_dist, :] = False
+    accum_mask[..., -min_dist:, :] = False
+    accum_mask[..., :min_dist] = False
+    accum_mask[..., -min_dist:] = False
+
+    for row_offset in range(-min_dist, min_dist + 1):
+        for col_offset in range(-min_dist, min_dist + 1):
+
+            # nothing to do if we're at 0, 0
+            if row_offset != 0 or col_offset != 0:
+
+                offset_data = data
+                if row_offset < 0:
+                    offset_data = offset_data[..., :row_offset, :]
+                    padding_size = data_size.copy()
+                    padding_size[-2] = -row_offset
+                    padding = offset_data.new_empty(padding_size)
+
+                    offset_data = torch.cat([padding, offset_data], -2)
+                elif row_offset > 0:
+                    offset_data = offset_data[..., row_offset:, :]
+                    padding_size = data_size.copy()
+                    padding_size[-2] = row_offset
+                    padding = offset_data.new_empty(padding_size)
+
+                    offset_data = torch.cat([offset_data, padding], -2)
+
+                if col_offset < 0:
+                    offset_data = offset_data[..., :col_offset]
+                    padding_size = data_size.copy()
+                    padding_size[-1] = -col_offset
+                    padding = offset_data.new_empty(padding_size)
+
+                    offset_data = torch.cat([padding, offset_data], -1)
+                elif col_offset > 0:
+                    offset_data = offset_data[..., col_offset:]
+                    padding_size = data_size.copy()
+                    padding_size[-1] = col_offset
+                    padding = offset_data.new_empty(padding_size)
+
+                    offset_data = torch.cat([offset_data, padding], -1)
+
+                # dominance will act as a "tie breaker" for pixels that have equal value
+                data_is_dominant = False
+                if row_offset != 0:
+                    data_is_dominant = row_offset > 0
+                else:
+                    data_is_dominant = col_offset > 0
+
+                if data_is_dominant:
+                    accum_mask &= data >= offset_data
+                else:
+                    accum_mask &= data > offset_data
+
+    return accum_mask
+
+
+def flatten_indices(indices_2d, shape):
+    """
+    This function will "flatten" the given index matrix such that it can be used
+    as input to pytorch's `take` function.
+    """
+
+    # calculate the index multiplier vector that allows us to convert
+    # from vector indicies to flat indices
+    index_mult_vec = indices_2d.new_ones(len(shape))
+    for i in range(len(shape) - 1):
+        index_mult_vec[ : i + 1] *= shape[i + 1]
+
+    return torch.sum(indices_2d * index_mult_vec, dim=1)
+
+
+def xy_dist(pt1, pt2):
+    x_diff = pt2['x_pos'] - pt1['x_pos']
+    y_diff = pt2['y_pos'] - pt1['y_pos']
+
+    return math.sqrt(x_diff ** 2 + y_diff ** 2)
+
+
+class PoseInstance(object):
+
+    def __init__(self):
+        self.keypoints = dict()
+        self.instance_track_id = 0
+        self._sum_inst_embed = 0
+        self._sum_inst_conf = 0
+
+    @property
+    def mean_inst_embed(self):
+        return self._sum_inst_embed / len(self.keypoints)
+
+    @property
+    def mean_inst_conf(self):
+        return self._sum_inst_conf / len(self.keypoints)
+
+    def add_keypoint(self, keypoint):
+
+        assert keypoint['joint_index'] not in self.keypoints
+        self.keypoints[keypoint['joint_index']] = keypoint
+
+        self._sum_inst_embed += keypoint['embed']
+        self._sum_inst_conf += keypoint['conf']
+
+    def nearest_dist(self, keypoint):
+        min_dist = None
+        for pose_keypoint in self.keypoints.values():
+            curr_dist = xy_dist(keypoint, pose_keypoint)
+
+            if min_dist is None or curr_dist < min_dist:
+                min_dist = curr_dist
+
+        return min_dist
+
+    def weighted_embed_dist(self, keypoint):
+
+        sum_of_weights = 0
+        sum_of_weighted_embed_dists = 0
+
+        for pose_keypoint in self.keypoints.values():
+            curr_xy_dist = xy_dist(keypoint, pose_keypoint)
+            curr_embed_dist = abs(keypoint['embed'] - pose_keypoint['embed'])
+            if curr_xy_dist == 0:
+                return curr_embed_dist
+
+            sum_of_weighted_embed_dists += curr_embed_dist / curr_xy_dist
+            sum_of_weights += 1.0 / curr_xy_dist
+
+        assert sum_of_weights > 0
+
+        return sum_of_weighted_embed_dists / sum_of_weights
+
+    @staticmethod
+    def from_xy_tensor(xy_tensor):
+        pi = PoseInstance()
+        pi.keypoints = {
+            i: {'x_pos': x_pos, 'y_pos': y_pos}
+            for i, (x_pos, y_pos) in enumerate(xy_tensor)
+        }
+
+        return pi
+
+
+def calc_pose_instances(
+        pose_heatmaps,
+        pose_localmax,
+        pose_embed_maps,
+        min_embed_sep_between,
+        max_embed_sep_within,
+        max_inst_dist):
+
+    """
+    Given the input parameters for a single image/frame, return a list
+    of PoseInstance objects
+
+    Parameters:
+        pose_heatmaps (tensor): contains 2D heatmaps representing confidence
+        that a pose keypoint is detected at the respective 2D pixel locations.
+        The shape of this tensor should be (joint_count, pixel_rows, pixel_columns)
+
+        pose_localmax (tensor): this is a boolean tensor with the same shape as
+        pose_heatmaps. Each true value locates a local maxima in pose_heatmaps
+
+        pose_embed_maps (tensor): the same shape as pose_heatmaps. This tensor
+        contains instance embedding values as described in "Associative Embedding:
+        End-to-End Learning for Joint Detection and Grouping" (Newell et al.)
+
+        min_embed_sep_between (number): minimum separation in the embedding space required
+        between instances that are in close proximity
+
+        max_embed_sep_within (number): maximum separation in the embedding space allowed
+        within an instance
+
+        max_inst_dist (number): the maximum distance is pixel units for neighboring
+        keypoints of an instance
+
+    Returns:
+        A list of PoseInstace objects
+    """
+
+    joint_count = pose_heatmaps.size(0)
+
+    pose_instances = []
+    for joint_index in range(joint_count):
+        joint_localmax = pose_localmax[joint_index, ...]
+
+        joint_xy = joint_localmax.nonzero().cpu()
+        joint_xy[...] = joint_xy[..., [1, 0]].clone()
+
+        joint_embed = pose_embed_maps[joint_index, ...]
+        joint_embed = joint_embed[joint_localmax].cpu()
+
+        pose_heatmap = pose_heatmaps[joint_index, ...]
+        pose_conf = pose_heatmap[joint_localmax].cpu()
+
+        joint_insts = []
+        for inst_index in range(joint_xy.size(0)):
+            joint_insts.append({
+                'joint_index': joint_index,
+                'x_pos': joint_xy[inst_index, 0].item(),
+                'y_pos': joint_xy[inst_index, 1].item(),
+                'conf': pose_conf[inst_index].item(),
+                'embed': joint_embed[inst_index].item(),
+            })
+
+        # Here we remove any keypoints that are both spatially too close and too
+        # close in the embedding space. In these cases the joint with higher confidence
+        # is kept and the other is discarded
+        joint_insts.sort(key=lambda j: j['conf'])
+        joint_insts_filtered = []
+        for inst_index1, joint_inst1 in enumerate(joint_insts):
+            min_embed_sep_violated = False
+            for joint_inst2 in joint_insts[inst_index1 + 1:]:
+                if (abs(joint_inst1['embed'] - joint_inst2['embed']) < min_embed_sep_between
+                        and xy_dist(joint_inst1, joint_inst2) <= max_inst_dist):
+                    min_embed_sep_violated = True
+                    break
+
+            if not min_embed_sep_violated:
+                joint_insts_filtered.append(joint_inst1)
+        joint_insts_filtered.reverse()
+        joint_insts = joint_insts_filtered
+
+        # we look at all valid combinations of joints with pose instances and
+        # we prioritize by embedding distance
+        candidate_keypoint_assignments = []
+        for keypoint_index, curr_joint in enumerate(joint_insts):
+            for pose_index, curr_pose_instance in enumerate(pose_instances):
+
+                max_inst_dist_violated = True
+                for pose_inst_pt in curr_pose_instance.keypoints.values():
+                    if xy_dist(curr_joint, pose_inst_pt) <= max_inst_dist:
+                        max_inst_dist_violated = False
+                        break
+
+                #embedding_dist = abs(curr_pose_instance.mean_inst_embed - curr_joint['embed'])
+                embedding_dist = curr_pose_instance.weighted_embed_dist(curr_joint)
+                if not max_inst_dist_violated and embedding_dist < max_embed_sep_within:
+                    candidate_keypoint_assignments.append(
+                        (pose_index, keypoint_index, embedding_dist))
+
+        unassigned_keypoint_indexes = set(range(len(joint_insts)))
+        candidate_keypoint_assignments.sort(key=lambda x: x[2])
+        for pose_index, keypoint_index, embedding_dist in candidate_keypoint_assignments:
+            curr_pose_instance = pose_instances[pose_index]
+            if (keypoint_index in unassigned_keypoint_indexes
+                    and joint_index not in curr_pose_instance.keypoints):
+                curr_pose_instance.add_keypoint(joint_insts[keypoint_index])
+                unassigned_keypoint_indexes.remove(keypoint_index)
+
+        for keypoint_index in unassigned_keypoint_indexes:
+            pose_instance = PoseInstance()
+            pose_instance.add_keypoint(joint_insts[keypoint_index])
+            pose_instances.append(pose_instance)
+
+        # # TODO pick one of these two if/else blocks and delete the other
+        # if False:
+        #     for joint_inst in joint_insts:
+        #         best_pose_match = None
+        #         best_embed_diff = None
+
+        #         # find nearest instance in embedding space
+        #         for pose_instance in pose_instances:
+        #             if joint_index not in pose_instance.keypoints:
+        #                 embed_diff = abs(joint_inst['embed'] - pose_instance.mean_inst_embed)
+        #                 if best_embed_diff is None or embed_diff < best_embed_diff:
+        #                     spatial_dist = pose_instance.nearest_dist(joint_inst)
+        #                     if spatial_dist <= max_inst_dist:
+        #                         best_pose_match = pose_instance
+        #                         best_embed_diff = embed_diff
+
+        #         if best_pose_match is None:
+        #             # since there's no existing pose match create a new one
+        #             best_pose_match = PoseInstance()
+        #             pose_instances.append(best_pose_match)
+
+        #         best_pose_match.add_keypoint(joint_inst)
+        # else:
+        #     for pose_instance in pose_instances:
+        #         best_keypoint_index = None
+        #         best_embed_diff = None
+
+        #         for keypoint_index, joint_inst in enumerate(joint_insts):
+        #             embed_diff = abs(joint_inst['embed'] - pose_instance.mean_inst_embed)
+        #             if best_embed_diff is None or embed_diff < best_embed_diff:
+        #                 spatial_dist = pose_instance.nearest_dist(joint_inst)
+        #                 if spatial_dist <= max_inst_dist:
+        #                     best_keypoint_index = keypoint_index
+        #                     best_embed_diff = embed_diff
+
+        #         if best_keypoint_index is not None:
+        #             best_keypoint = joint_insts[best_keypoint_index]
+        #             del joint_insts[best_keypoint_index]
+        #             pose_instance.add_keypoint(best_keypoint)
+
+        #     for joint_inst in joint_insts:
+        #         pose_instance = PoseInstance()
+        #         pose_instance.add_keypoint(joint_inst)
+        #         pose_instances.append(pose_instance)
+
+    return pose_instances
+
+
+def pose_distance(pose1, pose2):
+
+    """
+    Calculate an averaged pixel distance between two poses that can be used for pose tracking.
+    """
+
+    # TODO if this isn't good enough we should correct distance using keypoint speed
+    # to estimate position
+
+    # total_distance = 0
+    # point_count = 0
+
+    # for joint_index, pose1_keypoint in pose1.keypoints.items():
+    #     if joint_index in pose2.keypoints:
+    #         pose2_keypoint = pose2.keypoints[joint_index]
+    #         total_distance += xy_dist(pose1_keypoint, pose2_keypoint)
+    #         point_count += 1
+
+    # if point_count >= 1:
+    #     return total_distance / point_count
+    # else:
+    #     return math.inf
+
+    point_dists = []
+
+    for joint_index, pose1_keypoint in pose1.keypoints.items():
+        if joint_index in pose2.keypoints:
+            pose2_keypoint = pose2.keypoints[joint_index]
+            point_dists.append(xy_dist(pose1_keypoint, pose2_keypoint))
+
+    if point_dists:
+        return statistics.median(point_dists)
+    else:
+        return math.inf
diff --git a/lib/utils/transforms.py b/lib/utils/transforms.py
new file mode 100644
index 0000000..6b12f44
--- /dev/null
+++ b/lib/utils/transforms.py
@@ -0,0 +1,121 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import cv2
+
+
+def flip_back(output_flipped, matched_parts):
+    '''
+    ouput_flipped: numpy.ndarray(batch_size, num_joints, height, width)
+    '''
+    assert output_flipped.ndim == 4,\
+        'output_flipped should be [batch_size, num_joints, height, width]'
+
+    output_flipped = output_flipped[:, :, :, ::-1]
+
+    for pair in matched_parts:
+        tmp = output_flipped[:, pair[0], :, :].copy()
+        output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
+        output_flipped[:, pair[1], :, :] = tmp
+
+    return output_flipped
+
+
+def fliplr_joints(joints, joints_vis, width, matched_parts):
+    """
+    flip coords
+    """
+    # Flip horizontal
+    joints[:, 0] = width - joints[:, 0] - 1
+
+    # Change left-right parts
+    for pair in matched_parts:
+        joints[pair[0], :], joints[pair[1], :] = \
+            joints[pair[1], :], joints[pair[0], :].copy()
+        joints_vis[pair[0], :], joints_vis[pair[1], :] = \
+            joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
+
+    return joints*joints_vis, joints_vis
+
+
+def transform_preds(coords, center, scale, output_size):
+    target_coords = np.zeros(coords.shape)
+    trans = get_affine_transform(center, scale, 0, output_size, inv=1)
+    for p in range(coords.shape[0]):
+        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+    return target_coords
+
+
+def get_affine_transform(
+        center, scale, rot, output_size,
+        shift=np.array([0, 0], dtype=np.float32), inv=0
+):
+    if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
+        print(scale)
+        scale = np.array([scale, scale])
+
+    scale_tmp = scale * 200.0
+    src_w = scale_tmp[0]
+    dst_w = output_size[0]
+    dst_h = output_size[1]
+
+    rot_rad = np.pi * rot / 180
+    src_dir = get_dir([0, src_w * -0.5], rot_rad)
+    dst_dir = np.array([0, dst_w * -0.5], np.float32)
+
+    src = np.zeros((3, 2), dtype=np.float32)
+    dst = np.zeros((3, 2), dtype=np.float32)
+    src[0, :] = center + scale_tmp * shift
+    src[1, :] = center + src_dir + scale_tmp * shift
+    dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
+    dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
+
+    src[2:, :] = get_3rd_point(src[0, :], src[1, :])
+    dst[2:, :] = get_3rd_point(dst[0, :], dst[1, :])
+
+    if inv:
+        trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
+    else:
+        trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
+
+    return trans
+
+
+def affine_transform(pt, t):
+    new_pt = np.array([pt[0], pt[1], 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2]
+
+
+def get_3rd_point(a, b):
+    direct = a - b
+    return b + np.array([-direct[1], direct[0]], dtype=np.float32)
+
+
+def get_dir(src_point, rot_rad):
+    sn, cs = np.sin(rot_rad), np.cos(rot_rad)
+
+    src_result = [0, 0]
+    src_result[0] = src_point[0] * cs - src_point[1] * sn
+    src_result[1] = src_point[0] * sn + src_point[1] * cs
+
+    return src_result
+
+
+def crop(img, center, scale, output_size, rot=0):
+    trans = get_affine_transform(center, scale, rot, output_size)
+
+    dst_img = cv2.warpAffine(
+        img, trans, (int(output_size[0]), int(output_size[1])),
+        flags=cv2.INTER_LINEAR
+    )
+
+    return dst_img
diff --git a/lib/utils/utils.py b/lib/utils/utils.py
new file mode 100644
index 0000000..5a3b7da
--- /dev/null
+++ b/lib/utils/utils.py
@@ -0,0 +1,204 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import logging
+import time
+from collections import namedtuple
+from pathlib import Path
+
+import torch
+import torch.optim as optim
+import torch.nn as nn
+
+
+def create_logger(cfg, cfg_name, phase='train'):
+    root_output_dir = Path(cfg.OUTPUT_DIR)
+    # set up logger
+    if not root_output_dir.exists():
+        print('=> creating {}'.format(root_output_dir))
+        root_output_dir.mkdir(exist_ok=True)
+
+    dataset = cfg.DATASET.DATASET + '_' + cfg.DATASET.HYBRID_JOINTS_TYPE \
+        if cfg.DATASET.HYBRID_JOINTS_TYPE else cfg.DATASET.DATASET
+    dataset = dataset.replace(':', '_')
+    model = cfg.MODEL.NAME
+    cfg_name = os.path.basename(cfg_name).split('.')[0]
+
+    final_output_dir = root_output_dir / dataset / model / cfg_name
+
+    print('=> creating {}'.format(final_output_dir))
+    final_output_dir.mkdir(parents=True, exist_ok=True)
+
+    time_str = time.strftime('%Y-%m-%d-%H-%M')
+    log_file = '{}_{}_{}.log'.format(cfg_name, time_str, phase)
+    final_log_file = final_output_dir / log_file
+    head = '%(asctime)-15s %(message)s'
+    logging.basicConfig(filename=str(final_log_file),
+                        format=head)
+    logger = logging.getLogger()
+    logger.setLevel(logging.INFO)
+    console = logging.StreamHandler()
+    logging.getLogger('').addHandler(console)
+
+    tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
+        (cfg_name + '_' + time_str)
+
+    print('=> creating {}'.format(tensorboard_log_dir))
+    tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
+
+    return logger, str(final_output_dir), str(tensorboard_log_dir)
+
+
+def get_optimizer(cfg, model):
+    optimizer = None
+    if cfg.TRAIN.OPTIMIZER == 'sgd':
+        optimizer = optim.SGD(
+            model.parameters(),
+            lr=cfg.TRAIN.LR,
+            momentum=cfg.TRAIN.MOMENTUM,
+            weight_decay=cfg.TRAIN.WD,
+            nesterov=cfg.TRAIN.NESTEROV,
+        )
+    elif cfg.TRAIN.OPTIMIZER == 'adam':
+        optimizer = optim.Adam(
+            model.parameters(),
+            lr=cfg.TRAIN.LR,
+            weight_decay=cfg.TRAIN.WD,
+        )
+
+    return optimizer
+
+
+def save_checkpoint(states, is_best, output_dir,
+                    filename='checkpoint.pth'):
+    torch.save(states, os.path.join(output_dir, filename))
+    if is_best and 'state_dict' in states:
+        torch.save(states['best_state_dict'],
+                   os.path.join(output_dir, 'model_best.pth'))
+
+
+def get_model_summary(model, *input_tensors, item_length=26, verbose=False):
+    """
+    :param model:
+    :param input_tensors:
+    :param item_length:
+    :return:
+    """
+
+    summary = []
+
+    ModuleDetails = namedtuple(
+        "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
+    hooks = []
+    layer_instances = {}
+
+    def add_hooks(module):
+
+        def hook(module, input, output):
+            class_name = str(module.__class__.__name__)
+
+            instance_index = 1
+            if class_name not in layer_instances:
+                layer_instances[class_name] = instance_index
+            else:
+                instance_index = layer_instances[class_name] + 1
+                layer_instances[class_name] = instance_index
+
+            layer_name = class_name + "_" + str(instance_index)
+
+            params = 0
+
+            if class_name.find("Conv") != -1 or class_name.find("BatchNorm") != -1 or \
+               class_name.find("Linear") != -1:
+                for param_ in module.parameters():
+                    params += param_.view(-1).size(0)
+
+            flops = "Not Available"
+            if class_name.find("Conv") != -1 and hasattr(module, "weight"):
+                flops = (
+                    torch.prod(
+                        torch.LongTensor(list(module.weight.data.size()))) *
+                    torch.prod(
+                        torch.LongTensor(list(output.size())[2:]))).item()
+            elif isinstance(module, nn.Linear):
+                flops = (torch.prod(torch.LongTensor(list(output.size()))) \
+                         * input[0].size(1)).item()
+
+            if isinstance(input[0], list):
+                input = input[0]
+            if isinstance(output, list):
+                output = output[0]
+
+            summary.append(
+                ModuleDetails(
+                    name=layer_name,
+                    input_size=list(input[0].size()),
+                    output_size=list(output.size()),
+                    num_parameters=params,
+                    multiply_adds=flops)
+            )
+
+        if not isinstance(module, nn.ModuleList) \
+           and not isinstance(module, nn.Sequential) \
+           and module != model:
+            hooks.append(module.register_forward_hook(hook))
+
+    model.eval()
+    model.apply(add_hooks)
+
+    space_len = item_length
+
+    model(*input_tensors)
+    for hook in hooks:
+        hook.remove()
+
+    details = ''
+    if verbose:
+        details = "Model Summary" + \
+            os.linesep + \
+            "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
+                ' ' * (space_len - len("Name")),
+                ' ' * (space_len - len("Input Size")),
+                ' ' * (space_len - len("Output Size")),
+                ' ' * (space_len - len("Parameters")),
+                ' ' * (space_len - len("Multiply Adds (Flops)"))) \
+                + os.linesep + '-' * space_len * 5 + os.linesep
+
+    params_sum = 0
+    flops_sum = 0
+    for layer in summary:
+        params_sum += layer.num_parameters
+        if layer.multiply_adds != "Not Available":
+            flops_sum += layer.multiply_adds
+        if verbose:
+            details += "{}{}{}{}{}{}{}{}{}{}".format(
+                layer.name,
+                ' ' * (space_len - len(layer.name)),
+                layer.input_size,
+                ' ' * (space_len - len(str(layer.input_size))),
+                layer.output_size,
+                ' ' * (space_len - len(str(layer.output_size))),
+                layer.num_parameters,
+                ' ' * (space_len - len(str(layer.num_parameters))),
+                layer.multiply_adds,
+                ' ' * (space_len - len(str(layer.multiply_adds)))) \
+                + os.linesep + '-' * space_len * 5 + os.linesep
+
+    details += os.linesep \
+        + "Total Parameters: {:,}".format(params_sum) \
+        + os.linesep + '-' * space_len * 5 + os.linesep
+    details += "Total Multiply Adds (For Convolution and Linear Layers only): {:,} GFLOPs".format(flops_sum/(1024**3)) \
+        + os.linesep + '-' * space_len * 5 + os.linesep
+    details += "Number of Layers" + os.linesep
+    for layer in layer_instances:
+        details += "{} : {} layers   ".format(layer, layer_instances[layer])
+
+    return details
diff --git a/lib/utils/vis.py b/lib/utils/vis.py
new file mode 100755
index 0000000..adc0947
--- /dev/null
+++ b/lib/utils/vis.py
@@ -0,0 +1,141 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+
+import numpy as np
+import torchvision
+import cv2
+
+from core.inference import get_max_preds
+
+
+def save_batch_image_with_joints(batch_image, batch_joints, batch_joints_vis,
+                                 file_name, nrow=8, padding=2):
+    '''
+    batch_image: [batch_size, channel, height, width]
+    batch_joints: [batch_size, num_joints, 3],
+    batch_joints_vis: [batch_size, num_joints, 1],
+    }
+    '''
+    grid = torchvision.utils.make_grid(batch_image, nrow, padding, True)
+    ndarr = grid.mul(255).clamp(0, 255).byte().permute(1, 2, 0).cpu().numpy()
+    ndarr = ndarr.copy()
+
+    nmaps = batch_image.size(0)
+    xmaps = min(nrow, nmaps)
+    ymaps = int(math.ceil(float(nmaps) / xmaps))
+    height = int(batch_image.size(2) + padding)
+    width = int(batch_image.size(3) + padding)
+    k = 0
+    for y in range(ymaps):
+        for x in range(xmaps):
+            if k >= nmaps:
+                break
+            joints = batch_joints[k]
+            joints_vis = batch_joints_vis[k]
+
+            for joint, joint_vis in zip(joints, joints_vis):
+                joint[0] = x * width + padding + joint[0]
+                joint[1] = y * height + padding + joint[1]
+                if joint_vis[0]:
+                    cv2.circle(ndarr, (int(joint[0]), int(joint[1])), 2, [255, 0, 0], 2)
+            k = k + 1
+    cv2.imwrite(file_name, ndarr)
+
+
+def save_batch_heatmaps(batch_image, batch_heatmaps, file_name,
+                        normalize=True):
+    '''
+    batch_image: [batch_size, channel, height, width]
+    batch_heatmaps: ['batch_size, num_joints, height, width]
+    file_name: saved file name
+    '''
+    if normalize:
+        batch_image = batch_image.clone()
+        min = float(batch_image.min())
+        max = float(batch_image.max())
+
+        batch_image.add_(-min).div_(max - min + 1e-5)
+
+    batch_size = batch_heatmaps.size(0)
+    num_joints = batch_heatmaps.size(1)
+    heatmap_height = batch_heatmaps.size(2)
+    heatmap_width = batch_heatmaps.size(3)
+
+    grid_image = np.zeros((batch_size*heatmap_height,
+                           (num_joints+1)*heatmap_width,
+                           3),
+                          dtype=np.uint8)
+
+    preds, maxvals = get_max_preds(batch_heatmaps.detach().cpu().numpy())
+
+    for i in range(batch_size):
+        image = batch_image[i].mul(255)\
+                              .clamp(0, 255)\
+                              .byte()\
+                              .permute(1, 2, 0)\
+                              .cpu().numpy()
+        heatmaps = batch_heatmaps[i].mul(255)\
+                                    .clamp(0, 255)\
+                                    .byte()\
+                                    .cpu().numpy()
+
+        resized_image = cv2.resize(image,
+                                   (int(heatmap_width), int(heatmap_height)))
+
+        height_begin = heatmap_height * i
+        height_end = heatmap_height * (i + 1)
+        for j in range(num_joints):
+            cv2.circle(resized_image,
+                       (int(preds[i][j][0]), int(preds[i][j][1])),
+                       1, [0, 0, 255], 1)
+            heatmap = heatmaps[j, :, :]
+            colored_heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
+            masked_image = colored_heatmap*0.7 + resized_image*0.3
+            cv2.circle(masked_image,
+                       (int(preds[i][j][0]), int(preds[i][j][1])),
+                       1, [0, 0, 255], 1)
+
+            width_begin = heatmap_width * (j+1)
+            width_end = heatmap_width * (j+2)
+            grid_image[height_begin:height_end, width_begin:width_end, :] = \
+                masked_image
+            # grid_image[height_begin:height_end, width_begin:width_end, :] = \
+            #     colored_heatmap*0.7 + resized_image*0.3
+
+        grid_image[height_begin:height_end, 0:heatmap_width, :] = resized_image
+
+    cv2.imwrite(file_name, grid_image)
+
+
+def save_debug_images(config, input, meta, target, joints_pred, output,
+                      prefix):
+    if not config.DEBUG.DEBUG:
+        return
+
+    if config.DEBUG.SAVE_BATCH_IMAGES_GT:
+        save_batch_image_with_joints(
+            input, meta['joints'], meta['joints_vis'],
+            '{}_gt.jpg'.format(prefix)
+        )
+    if config.DEBUG.SAVE_BATCH_IMAGES_PRED:
+        save_batch_image_with_joints(
+            input, joints_pred, meta['joints_vis'],
+            '{}_pred.jpg'.format(prefix)
+        )
+    if config.DEBUG.SAVE_HEATMAPS_GT:
+        save_batch_heatmaps(
+            input, target, '{}_hm_gt.jpg'.format(prefix)
+        )
+    if config.DEBUG.SAVE_HEATMAPS_PRED:
+        save_batch_heatmaps(
+            input, output, '{}_hm_pred.jpg'.format(prefix)
+        )
diff --git a/lib/utils/xform.py b/lib/utils/xform.py
new file mode 100644
index 0000000..eeaef8b
--- /dev/null
+++ b/lib/utils/xform.py
@@ -0,0 +1,69 @@
+import cv2
+import numpy as np
+
+# while this code logically belongs in transforms.py, I put it in
+# its own module to minimize the number of modifications I've made
+# to the original HResNet code.
+
+
+def centered_transform_mat(center_xy, rot_deg, scale, out_wh):
+    half_width = out_wh[0] / 2.0
+    half_height = out_wh[1] / 2.0
+    translate_mat = np.float32([
+        [1.0, 0.0, -center_xy[0] + half_width],
+        [0.0, 1.0, -center_xy[1] + half_height],
+        [0.0, 0.0, 1.0],
+    ])
+
+    rot_rad = rot_deg * np.pi / 180
+    alpha = scale * np.cos(rot_rad)
+    beta = scale * np.sin(rot_rad)
+    rot_scale_mat = np.float32([
+        [alpha, beta, (1 - alpha) * half_width - beta * half_height],
+        [-beta, alpha, beta * half_width + (1 - alpha) * half_height],
+        [0.0, 0.0, 1.0],
+    ])
+
+    return rot_scale_mat @ translate_mat
+
+
+def random_occlusion(img, max_occlusion_size, opacity):
+
+    assert img.ndim == 2 or img.ndim == 3
+
+    nchan = 0
+    if img.ndim == 2:
+        img_height, img_width = img.shape
+    elif img.ndim == 3:
+        nchan, img_height, img_width = img.shape
+
+    occ_center_x = np.random.rand() * img_width
+    occ_center_y = np.random.rand() * img_height
+
+    if np.random.rand() < 0.5:
+        occ_min_x = occ_center_x - max_occlusion_size / 2
+        occ_min_y = occ_center_y - max_occlusion_size / 2
+
+        random_points = (
+            np.random.rand(1, np.random.randint(3, 7), 2) * max_occlusion_size
+            + np.array([[[occ_min_x, occ_min_y]]])
+        )
+        random_points = random_points.astype(np.int32)
+
+        mask = np.zeros([img_height, img_width], dtype=np.uint8)
+        cv2.fillPoly(mask, random_points, 255)
+    else:
+        mask = np.zeros([img_height, img_width], dtype=np.uint8)
+        cv2.ellipse(
+            mask,
+            (int(occ_center_x), int(occ_center_y)),
+            (int(max_occlusion_size * np.random.rand() / 2), int(max_occlusion_size * np.random.rand() / 2)),
+            np.random.randint(0, 359),
+            0, 360, 255, -1)
+    mask = mask.astype(np.bool)
+    if img.ndim == 3:
+        mask = np.stack([mask] * nchan)
+
+    rand_shade = np.random.randint(0, 255)
+    img_float = img.astype(np.float32)
+    img[mask] = img_float[mask] * (1 - opacity) + rand_shade * opacity
diff --git a/lib/utils/zipreader.py b/lib/utils/zipreader.py
new file mode 100644
index 0000000..dab919f
--- /dev/null
+++ b/lib/utils/zipreader.py
@@ -0,0 +1,70 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import zipfile
+import xml.etree.ElementTree as ET
+
+import cv2
+import numpy as np
+
+_im_zfile = []
+_xml_path_zip = []
+_xml_zfile = []
+
+
+def imread(filename, flags=cv2.IMREAD_COLOR):
+    global _im_zfile
+    path = filename
+    pos_at = path.index('@')
+    if pos_at == -1:
+        print("character '@' is not found from the given path '%s'"%(path))
+        assert 0
+    path_zip = path[0: pos_at]
+    path_img = path[pos_at + 2:]
+    if not os.path.isfile(path_zip):
+        print("zip file '%s' is not found"%(path_zip))
+        assert 0
+    for i in range(len(_im_zfile)):
+        if _im_zfile[i]['path'] == path_zip:
+            data = _im_zfile[i]['zipfile'].read(path_img)
+            return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
+
+    _im_zfile.append({
+        'path': path_zip,
+        'zipfile': zipfile.ZipFile(path_zip, 'r')
+    })
+    data = _im_zfile[-1]['zipfile'].read(path_img)
+
+    return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
+
+
+def xmlread(filename):
+    global _xml_path_zip
+    global _xml_zfile
+    path = filename
+    pos_at = path.index('@')
+    if pos_at == -1:
+        print("character '@' is not found from the given path '%s'"%(path))
+        assert 0
+    path_zip = path[0: pos_at]
+    path_xml = path[pos_at + 2:]
+    if not os.path.isfile(path_zip):
+        print("zip file '%s' is not found"%(path_zip))
+        assert 0
+    for i in xrange(len(_xml_path_zip)):
+        if _xml_path_zip[i] == path_zip:
+            data = _xml_zfile[i].open(path_xml)
+            return ET.fromstring(data.read())
+    _xml_path_zip.append(path_zip)
+    print("read new xml file '%s'"%(path_zip))
+    _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
+    data = _xml_zfile[-1].open(path_xml)
+    return ET.fromstring(data.read())
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..980d1a4
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,19 @@
+Pillow
+EasyDict
+opencv-python
+shapely
+Cython
+scipy
+pandas
+pyyaml
+json_tricks
+scikit-image
+yacs
+imageio-ffmpeg
+h5py
+tensorboard
+future
+# Cluster needs CUDA 9.2 version so we install separately as:
+# pip3 install torch==1.3.0+cu92 torchvision==0.4.1+cu92 -f https://download.pytorch.org/whl/torch_stable.html
+torch
+torchvision
diff --git a/samplevids/samplevidsbatch.sh b/samplevids/samplevidsbatch.sh
new file mode 100755
index 0000000..7d865bd
--- /dev/null
+++ b/samplevids/samplevidsbatch.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+#SBATCH --job-name=sample-vids
+#
+#SBATCH --time=6:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --qos=batch
+#SBATCH --mem=8G
+#SBATCH --nice
+
+trim_sp() {
+    local var="$*"
+    # remove leading whitespace characters
+    var="${var#"${var%%[![:space:]]*}"}"
+    # remove trailing whitespace characters
+    var="${var%"${var##*[![:space:]]}"}"
+    echo -n "$var"
+}
+
+export PATH="/opt/singularity/bin:${PATH}"
+if [[ -n "${SLURM_JOB_ID}" ]]
+then
+    # the script is being run by slurm
+    if [[ -n "${SLURM_ARRAY_TASK_ID}" ]]
+    then
+        if [[ -n "${BATCH_FILE}" ]]
+        then
+            # here we use the array ID to pull out the right video
+            VIDEO_FILE=$(trim_sp $(sed -n "${SLURM_ARRAY_TASK_ID}{p;q;}" < "${BATCH_FILE}"))
+            echo "BEGIN PROCESSING: ${VIDEO_FILE}"
+            echo "DUMP OF CURRENT ENVIRONMENT:"
+            env
+
+            cd "$(dirname "${BATCH_FILE}")"
+            mkdir -p 'vids'
+            mkdir -p 'frames'
+
+            rclone copy --include "${VIDEO_FILE}" "labdropbox:/KumarLab's shared workspace/VideoData/MDS_Tests" vids
+
+            if [[ -f "vids/${VIDEO_FILE}" ]]
+            then
+                module load singularity
+                singularity exec "/projects/kumar-lab/USERS/sheppk/poseest-env/multi-mouse-pose-2020-02-12.sif" bash -c "python3 sampleframes.py --videos 'vids/${VIDEO_FILE}' --root-dir vids --outdir frames --neighbor-frame-count 5 --mark-frame"
+                rm "vids/${VIDEO_FILE}"
+
+                echo "FINISHED PROCESSING: ${VIDEO_FILE}"
+            else
+                echo "ERROR: could not find video file: ${VIDEO_FILE}" >&2
+            fi
+        else
+            echo "ERROR: the BATCH_FILE environment variable is not defined" >&2
+        fi
+    else
+        echo "ERROR: no SLURM_ARRAY_TASK_ID found" >&2
+    fi
+else
+    # the script is being run from command line. We should do a self-submit as an array job
+    if [[ -f "${1}" ]]
+    then
+        # echo "${1} is set and not empty"
+        echo "Preparing to submit batch file: ${1}"
+        test_count=$(wc -l < "${1}")
+        echo "Submitting an array job for ${test_count} videos"
+
+        # Here we perform a self-submit
+        sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}" --array="1-${test_count}%24" "${0}"
+    else
+        echo "ERROR: you need to provide a batch file to process. Eg: ./samplevidsbatch batchfile.txt" >&2
+        exit 1
+    fi
+fi
diff --git a/test-multi-mouse-pose.sh b/test-multi-mouse-pose.sh
new file mode 100755
index 0000000..9288edc
--- /dev/null
+++ b/test-multi-mouse-pose.sh
@@ -0,0 +1,166 @@
+#!/bin/bash
+
+# for((i=1; i<7; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse-${i}.yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse-${i}/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse-${i} \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<11; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2019-11-19_${i}.yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2019-11-19_${i}/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2019-11-19_${i} \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<9; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2019-12-19_${i}.yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2019-12-19_${i}/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2019-12-19_${i} \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<2; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2019-12-31_${i}.yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2019-12-31_${i}/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2019-12-31_${i} \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<17; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2020-01-17_$(printf %02d $i).yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-01-17_$(printf %02d $i)/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --min-pose-heatmap-val 1.0 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2020-01-17_$(printf %02d $i) \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<10; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2020-01-21_$(printf %02d $i).yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-01-21_$(printf %02d $i)/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --min-pose-heatmap-val 1.0 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2020-01-21_$(printf %02d $i) \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<13; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2020-01-22_$(printf %02d $i).yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-01-22_$(printf %02d $i)/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --min-pose-heatmap-val 1.0 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2020-01-22_$(printf %02d $i) \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<8; i++))
+# do
+#     # python -u tools/testmultimouseinference.py \
+#     #     --cfg experiments/multimouse/multimouse_2020-01-30_$(printf %02d $i).yaml \
+#     #     --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-01-30_$(printf %02d $i)/best_state.pth \
+#     #     --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#     #     --image-dir data/multi-mouse/Dataset \
+#     #     --image-list data/multi-mouse-val-set.txt \
+#     #     --max-embed-sep-within-instances 0.3 \
+#     #     --min-embed-sep-between-instances 0.3 \
+#     #     --min-pose-heatmap-val 1.0 \
+#     #     --plot-heatmap \
+#     #     --image-out-dir temp/multimouse_2020-01-30_$(printf %02d $i) \
+#     #     --dist-out-file output-multi-mouse/dist-out.txt
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2020-01-30_$(printf %02d $i).yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-01-30_$(printf %02d $i)/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2020-01-30_$(printf %02d $i) \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+# for((i=1; i<13; i++))
+# do
+#     python -u tools/testmultimouseinference.py \
+#         --cfg experiments/multimouse/multimouse_2020-02-03_$(printf %02d $i).yaml \
+#         --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-02-03_$(printf %02d $i)/best_state.pth \
+#         --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#         --image-dir data/multi-mouse/Dataset \
+#         --image-list data/multi-mouse-val-set.txt \
+#         --max-embed-sep-within-instances 0.3 \
+#         --min-embed-sep-between-instances 0.3 \
+#         --plot-heatmap \
+#         --image-out-dir temp/multimouse_2020-02-03_$(printf %02d $i) \
+#         --dist-out-file output-multi-mouse/dist-out.txt
+# done
+
+for((i=1; i<4; i++))
+do
+    python -u tools/testmultimouseinference.py \
+        --cfg experiments/multimouse/multimouse_2020-02-10_$(printf %02d $i).yaml \
+        --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-02-10_$(printf %02d $i)/best_state.pth \
+        --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+        --image-dir data/multi-mouse/Dataset \
+        --image-list data/multi-mouse-val-set.txt \
+        --max-embed-sep-within-instances 0.3 \
+        --min-embed-sep-between-instances 0.3 \
+        --plot-heatmap \
+        --image-out-dir temp/multimouse_2020-02-10_$(printf %02d $i) \
+        --dist-out-file output-multi-mouse/dist-out.txt
+done
diff --git a/tools/_init_paths.py b/tools/_init_paths.py
new file mode 100644
index 0000000..b1aea8f
--- /dev/null
+++ b/tools/_init_paths.py
@@ -0,0 +1,27 @@
+# ------------------------------------------------------------------------------
+# pose.pytorch
+# Copyright (c) 2018-present Microsoft
+# Licensed under The Apache-2.0 License [see LICENSE for details]
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os.path as osp
+import sys
+
+
+def add_path(path):
+    if path not in sys.path:
+        sys.path.insert(0, path)
+
+
+this_dir = osp.dirname(__file__)
+
+lib_path = osp.join(this_dir, '..', 'lib')
+add_path(lib_path)
+
+mm_path = osp.join(this_dir, '..', 'lib/poseeval/py-motmetrics')
+add_path(mm_path)
diff --git a/tools/addpixelunits.py b/tools/addpixelunits.py
new file mode 100644
index 0000000..cce2159
--- /dev/null
+++ b/tools/addpixelunits.py
@@ -0,0 +1,66 @@
+import argparse
+import h5py
+import numpy as np
+import os
+from pathlib import Path, WindowsPath
+import yaml
+
+CORNERS_SUFFIX = '_corners_v2.yaml'
+ARENA_SIZE_CM = 52
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--arena-size-cm',
+        type=float,
+        default=52,
+        help='the arena size is used to derive cm/pixel using corners files',
+    )
+    parser.add_argument(
+        'rootdir',
+        help='the root directory that we parse and add unit attributes to'
+    )
+
+    args = parser.parse_args()
+
+    for dirpath, dirnames, filenames in os.walk(args.rootdir):
+        for filename in filenames:
+            if filename.endswith(CORNERS_SUFFIX):
+                pose_path_exists = False
+                for pose_version in (4, 3, 2):
+                    pose_suffix = f'_pose_est_v{pose_version}.h5'
+                    pose_filename = filename[:-len(CORNERS_SUFFIX)] + pose_suffix
+                    pose_path = Path(dirpath, pose_filename)
+                    pose_path_exists = pose_path.exists()
+                    if pose_path_exists:
+                        break
+                
+                if pose_path_exists:
+                    corners_path = Path(dirpath, filename)
+                    with open(corners_path) as corners_file:
+                        corners_dict = yaml.safe_load(corners_file)
+                        print(list(corners_dict.keys()))
+                        xs = corners_dict['corner_coords']['xs']
+                        ys = corners_dict['corner_coords']['ys']
+
+                        # get all of the non-diagonal pixel distances between
+                        # corners and take the meadian
+                        xy_ul, xy_ll, xy_ur, xy_lr = [
+                            np.array(xy, dtype=np.float) for xy in zip(xs, ys)
+                        ]
+                        med_corner_dist_px = np.median([
+                            np.linalg.norm(xy_ul - xy_ll),
+                            np.linalg.norm(xy_ll - xy_lr),
+                            np.linalg.norm(xy_lr - xy_ur),
+                            np.linalg.norm(xy_ur - xy_ul),
+                        ])
+
+                        cm_per_pixel = np.float32(args.arena_size_cm / med_corner_dist_px)
+                        with h5py.File(pose_path, 'r+') as pose_h5_file:
+                            pose_h5_file['poseest'].attrs['cm_per_pixel'] = cm_per_pixel
+                            pose_h5_file['poseest'].attrs['cm_per_pixel_source'] = 'corner_detection'
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/extractframes.py b/tools/extractframes.py
new file mode 100644
index 0000000..b0a681c
--- /dev/null
+++ b/tools/extractframes.py
@@ -0,0 +1,108 @@
+import argparse
+import csv
+import imageio
+import itertools
+import math
+import numpy as np
+import os
+
+
+# Example:
+#
+#   share_root='/home/sheppk/smb/labshare'
+#   python tools/extractframes.py \
+#       --root-dir "${share_root}" \
+#       --videos \
+#           "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-22_23-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-23_11-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-23_23-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-24_12-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-25_00-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-25_11-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-26_06-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-26_21-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-27_11-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-28_01-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-28_16-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-29_09-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-29_23-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-04-30_11-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-05-01_06-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-05-02_02-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-05-02_18-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-05-03_07-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-05-03_21-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-22/MDX0159_2020-05-04_11-00-00.avi \
+#           "${share_root}"/NV5-CBAX2/2020-03-26/MDX0148_2020-03-26_17-00-00.avi "${share_root}"/NV5-CBAX2/2020-03-26/MDX0148_2020-03-27_06-00-00.avi "${share_root}"/NV5-CBAX2/2020-03-26/MDX0148_2020-03-28_00-00-00.avi "${share_root}"/NV5-CBAX2/2020-03-26/MDX0148_2020-03-28_16-00-00.avi "${share_root}"/NV5-CBAX2/2020-03-26/MDX0148_2020-03-29_06-00-00.avi "${share_root}"/NV5-CBAX2/2020-03-26/MDX0148_2020-03-29_21-00-00.avi \
+#       --frame-indexes 600 \
+#       --outdir fecal-boli-image-batch4
+
+#   python tools/extractframes.py \
+#       --root-dir "${share_root}" \
+#       --videos \
+#           "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-09_22-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-10_09-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-11_01-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-11_13-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-12_01-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-12_14-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-13_01-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-13_13-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-14_00-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-14_12-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-14_23-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-15_13-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-16_01-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-16_13-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-17_03-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-17_17-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-18_11-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-19_03-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-19_14-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-20_07-00-00.avi "${share_root}"/NV5-CBAX2/2020-04-09/MDX0159_2020-04-21_00-00-00.avi \
+#       --frame-indexes 600 \
+#       --outdir fecal-boli-image-batch4
+
+
+def write_frames(root_dir, net_id, frame_indexes, out_dir):
+    vid_fname = os.path.join(root_dir, net_id)
+    print('Processing:', vid_fname, 'with', len(frame_indexes), 'frames')
+
+    net_id_root, _ = os.path.splitext(net_id)
+
+    frame_indexes = sorted(frame_indexes)
+    os.makedirs(out_dir, exist_ok=True)
+    with imageio.get_reader(vid_fname) as reader:
+        for frame_index in frame_indexes:
+            if frame_index < 0:
+                print('ignoring negative frame index', frame_index)
+                continue
+
+            img_data = reader.get_data(frame_index)
+            frame_fname = '{}_{}.png'.format(
+                net_id_root.replace('/', '+').replace('\\', '+'),
+                frame_index)
+            imageio.imwrite(os.path.join(out_dir, frame_fname), img_data)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--videos',
+        nargs='+',
+        help='the input videos',
+    )
+    parser.add_argument(
+        '--frame-indexes',
+        type=int,
+        nargs='+',
+        help='the frame indexes to extract',
+    )
+    parser.add_argument(
+        '--frame-table',
+        help='A tab separated file where the first column is the video NetID and all'
+             ' subsequent columns are zero based frame indexes to extract. This argument'
+             ' can be used instead of the videos and frame-indexes arguments.',
+    )
+    parser.add_argument(
+        '--frame-table-row',
+        help='An optional argument to specify that just a single zero-based index row'
+             ' should be processed from the frame table.',
+        type=int,
+    )
+    parser.add_argument(
+        '--root-dir',
+        required=True,
+        help='when determining video network ID this prefix root is stripped from the video name',
+    )
+    parser.add_argument(
+        '--outdir',
+        required=True,
+        help='the output directory',
+    )
+
+    args = parser.parse_args()
+
+    root_dir = os.path.normpath(args.root_dir)
+
+    if args.videos is not None:
+        for vid_fname in args.videos:
+            net_id = os.path.relpath(os.path.normpath(vid_fname), root_dir)
+            write_frames(root_dir, net_id, args.frame_indexes, args.outdir)
+
+    if args.frame_table:
+        with open(args.frame_table, newline='') as frame_table_file:
+            frame_table_reader = csv.reader(frame_table_file, delimiter='\t')
+            for row_index, row in enumerate(frame_table_reader):
+                if args.frame_table_row is None or row_index == args.frame_table_row:
+                    if len(row) >= 2:
+                        net_id = row[0].strip()
+                        frame_indexes = sorted(int(x.strip()) for x in row[1:])
+                        write_frames(root_dir, net_id, frame_indexes, args.outdir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/gathercvatframes.py b/tools/gathercvatframes.py
new file mode 100644
index 0000000..5dcbe59
--- /dev/null
+++ b/tools/gathercvatframes.py
@@ -0,0 +1,114 @@
+import argparse
+import imageio
+import itertools
+import os
+import re
+import time
+
+import _init_paths
+from dataset.multimousepose import parse_poses
+
+
+def _decompose_name(frame_filename):
+    m = re.match(r'(.+)_([0-9]+).png', frame_filename)
+    return m.group(1), int(m.group(2))
+
+# Example call:
+#
+# python -u tools/gathercvatframes.py \
+#   --cvat-xml data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml \
+#   --outdir data/multi-mouse/Dataset \
+#   --root-dir '/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar' \
+#   --include-neighbor-frames \
+#   --vid-path-str-replace NV6-B2B NV6-CBAX2
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--cvat-xml',
+        required=True,
+        nargs='+',
+        help='CVAT XML files that we gather frames for',
+    )
+    parser.add_argument(
+        '--root-dir',
+        required=True,
+        help='the root directory location where video files are organized according to "network ID"'
+    )
+    parser.add_argument(
+        '--include-neighbor-frames',
+        action='store_true',
+        help='gather neighboring frames too (ie for frame n we also save frame n-1 and n+1)'
+    )
+    parser.add_argument(
+        '--outdir',
+        required=True,
+        help='the output directory',
+    )
+    parser.add_argument(
+        '--vid-path-str-replace',
+        nargs='+',
+        default=[],
+        help='find and replace string pairs (so this should have an even number of strings)',
+    )
+
+    args = parser.parse_args()
+
+    all_pose_labels = itertools.chain.from_iterable(parse_poses(xml) for xml in args.cvat_xml)
+    all_filenames = {lbl['image_name'] for lbl in all_pose_labels}
+
+    assert len(args.vid_path_str_replace) % 2 == 0
+
+    if args.include_neighbor_frames:
+        for fname in set(all_filenames):
+            vid_fragment, frame_index = _decompose_name(fname)
+            if frame_index > 0:
+                all_filenames.add('{}_{}.png'.format(vid_fragment, frame_index - 1))
+            all_filenames.add('{}_{}.png'.format(vid_fragment, frame_index + 1))
+
+    all_filenames = sorted(all_filenames, key=_decompose_name)
+
+    for vid_frag, name_grp in itertools.groupby(all_filenames, key=lambda f: _decompose_name(f)[0]):
+        missing_fnames = []
+        for fname in name_grp:
+            cache_file = os.path.join(args.outdir, fname)
+            if os.path.exists(cache_file):
+                print('EXISTS: ', cache_file)
+            else:
+                print('MISSING:', cache_file)
+                missing_fnames.append(fname)
+
+        if missing_fnames:
+            network_filename = vid_frag.replace('+', '/')
+            for i in range(len(args.vid_path_str_replace) // 2):
+                find_str = args.vid_path_str_replace[i * 2]
+                replace_str = args.vid_path_str_replace[i * 2 + 1]
+                network_filename = network_filename.replace(find_str, replace_str)
+
+            vid_fname = os.path.join(args.root_dir, network_filename)
+
+            print("OPENING")
+            try:
+                with imageio.get_reader(vid_fname) as reader:
+                    print("OPENED")
+                    for fname in missing_fnames:
+                        _, frame_index = _decompose_name(fname)
+                        try:
+                            print("GETTING")
+                            img_data = reader.get_data(frame_index)
+                            print("GOT")
+                            imageio.imwrite(os.path.join(args.outdir, fname), img_data)
+                            print("WRITTEN")
+
+                            # sadly this sleep is needed to prevent ffmpeg from hanging
+                            time.sleep(1)
+
+                        except IndexError:
+                            print('FAILED TO READ FRAME', frame_index, 'FROM', vid_fname)
+            except:
+                print('Failed to read video:', vid_fname)
+
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/infercorners.py b/tools/infercorners.py
new file mode 100644
index 0000000..2afa2d9
--- /dev/null
+++ b/tools/infercorners.py
@@ -0,0 +1,270 @@
+import argparse
+import h5py
+import imageio
+import numpy as np
+import time
+import yaml
+
+import torch
+import torch.nn.parallel
+import torch.nn.functional as torchfunc
+import torch.multiprocessing as mp
+import torch.backends.cudnn as cudnn
+import torch.optim
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+import models
+import cv2
+import string
+import os.path
+
+import skimage.draw
+import skimage.io
+
+FRAMES_PER_MINUTE = 30 * 60
+
+
+# Example use:
+#
+#   time python -u tools/infercorners.py \
+#       --model-file output-full-mouse-pose/hdf5mousepose/pose_hrnet/corner-detection/model_best.pth \
+#       --cfg corner-detection.yaml \
+#       --root-dir ~/smb/labshare \
+#       --batch-file netfiles.csv
+#
+#   time python -u tools/infercorners.py \
+#       --model-file output-corner/simplepoint/pose_hrnet/corner_2020-06-30_01/best_state.pth \
+#       --cfg experiments/corner/corner_2020-06-30_01.yaml \
+#       --root-dir ~/smb/labshare \
+#       --batch-file /home/sheppk/projects/massimo-deep-hres-net/netfiles.csv
+#
+#   time python -u tools/infercorners.py \
+#       --model-file output-corner/simplepoint/pose_hrnet/corner_2020-06-30_01/best_state.pth \
+#       --cfg experiments/corner/corner_2020-06-30_01.yaml \
+#       --root-dir ~/smb/labshare \
+#       --batch-file data/corner/leinani-corner-batch-2020-08-20.txt
+#
+#   share_root='/run/user/1000/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar'
+#   time python -u tools/infercorners.py \
+#       --model-file output-corner/simplepoint/pose_hrnet/corner_2020-06-30_01/best_state.pth \
+#       --cfg experiments/corner/corner_2020-06-30_01.yaml \
+#       --root-dir "${share_root}" \
+#       --batch-file ~/projects/gaitanalysis/data/metadata/strain-survey-b6j-bjnj-only-batch-2021-01-18.txt
+
+def argmax_2d(tensor):
+
+    assert tensor.dim() >= 2
+    max_col_vals, max_cols = torch.max(tensor, -1, keepdim=True)
+    max_vals, max_rows = torch.max(max_col_vals, -2, keepdim=True)
+    max_cols = torch.gather(max_cols, -2, max_rows)
+
+    max_vals = max_vals.squeeze(-1).squeeze(-1)
+    max_rows = max_rows.squeeze(-1).squeeze(-1)
+    max_cols = max_cols.squeeze(-1).squeeze(-1)
+
+    return max_vals, torch.stack([max_rows, max_cols], -1)
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--cfg',
+        required=True,
+        help='the configuration for the model to use for inference',
+    )
+    parser.add_argument(
+        '--model-file',
+        required=True,
+        help='the model file to use for inference',
+    )
+    parser.add_argument(
+        '--batch-file',
+        required=False,
+        help='the batch file listing videos to process',
+    )
+    parser.add_argument(
+        '--root-dir',
+        required=False,
+        help='the root directory that batch file paths are build off of'
+    )
+    parser.add_argument(
+        '--videos',
+        required=False,
+        nargs='+',
+        help='specify video paths on the command line as an alternative'
+             ' to using the "--batch-file" and "--root-dir" arguments',
+    )
+    
+
+    args = parser.parse_args()
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    if args.model_file:
+        cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    start_time = time.time()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    xform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.45, 0.45, 0.45],
+            std=[0.225, 0.225, 0.225],
+        ),
+    ])
+
+    video_filenames = []
+    if args.batch_file:
+        with open(args.batch_file) as batch_file:
+            for line in batch_file:
+                vid_filename = line.strip()
+                if vid_filename:
+                    video_filename = os.path.join(args.root_dir, vid_filename)
+                    video_filenames.append(video_filename)
+
+    if args.videos:
+        for video_filename in args.videos:
+            video_filenames.append(video_filename)
+
+    with torch.no_grad():
+        for video_filename in video_filenames:
+            if not os.path.isfile(video_filename):
+                print(f'ERROR: {video_filename} is not a valid file')
+                continue
+
+            with imageio.get_reader(video_filename) as reader:
+
+                all_preds = []
+                all_maxvals = []
+                batch = []
+
+                def perform_inference():
+                    if batch:
+                        batch_tensor = torch.stack([xform(img) for img in batch]).cuda()
+                        batch.clear()
+
+                        x = model(batch_tensor)
+
+                        x.squeeze_(-3)
+
+                        img_h = batch_tensor.size(-2)
+                        img_w = batch_tensor.size(-1)
+
+                        x_ul = x[:, :(img_h // 2), :(img_w // 2)]
+                        x_ll = x[:, (img_h // 2):, :(img_w // 2)]
+                        x_ur = x[:, :(img_h // 2), (img_w // 2):]
+                        x_lr = x[:, (img_h // 2):, (img_w // 2):]
+
+                        maxvals_ul, preds_ul = argmax_2d(x_ul)
+                        maxvals_ll, preds_ll = argmax_2d(x_ll)
+                        maxvals_ur, preds_ur = argmax_2d(x_ur)
+                        maxvals_lr, preds_lr = argmax_2d(x_lr)
+
+                        preds_ul = preds_ul.cpu().numpy().astype(np.uint16)
+                        preds_ll = preds_ll.cpu().numpy().astype(np.uint16)
+                        preds_ur = preds_ur.cpu().numpy().astype(np.uint16)
+                        preds_lr = preds_lr.cpu().numpy().astype(np.uint16)
+
+                        preds_ll[..., 0] += img_h // 2
+                        preds_ur[..., 1] += img_w // 2
+                        preds_lr[..., 0] += img_h // 2
+                        preds_lr[..., 1] += img_w // 2
+
+                        pred_stack = np.stack(
+                            [preds_ul, preds_ll, preds_ur, preds_lr],
+                            axis=-2,
+                        )
+
+                        all_preds.append(pred_stack)
+
+                last_frame_index = 600
+                frame_step_size = 100
+                for frame_index, image in enumerate(reader):
+
+                    if frame_index == 0:
+                        mockup = image
+
+                    if frame_index % frame_step_size == 0:
+
+                        batch.append(image)
+                        perform_inference()
+
+                    if frame_index == last_frame_index:
+                        break
+
+                all_preds = np.concatenate(all_preds)
+
+                xmed_ul = []
+                xmed_ll = []
+                xmed_ur = []
+                xmed_lr = []
+
+                ymed_ul = []
+                ymed_ll = []
+                ymed_ur = []
+                ymed_lr = []
+
+                for i in range(len(all_preds[0])):
+                    xmed_ul.append(all_preds[i, 0, 1])
+                    xmed_ll.append(all_preds[i, 1, 1])
+                    xmed_ur.append(all_preds[i, 2, 1])
+                    xmed_lr.append(all_preds[i, 3, 1])
+
+                    ymed_ul.append(all_preds[i, 0, 0])
+                    ymed_ll.append(all_preds[i, 1, 0])
+                    ymed_ur.append(all_preds[i, 2, 0])
+                    ymed_lr.append(all_preds[i, 3, 0])
+
+                xs = [
+                    int(np.median(xmed_ul)),
+                    int(np.median(xmed_ll)),
+                    int(np.median(xmed_ur)),
+                    int(np.median(xmed_lr)),
+                ]
+                ys = [
+                    int(np.median(ymed_ul)),
+                    int(np.median(ymed_ll)),
+                    int(np.median(ymed_ur)),
+                    int(np.median(ymed_lr)),
+                ]
+                out_doc = {
+                    'corner_coords': {
+                        'xs': xs,
+                        'ys': ys,
+                    }
+                }
+
+                video_filename_root, _ = os.path.splitext(video_filename)
+                video_yaml_out_filename = video_filename_root + '_corners_v2.yaml'
+                print('Writing to:', video_yaml_out_filename)
+                with open(video_yaml_out_filename, 'w') as video_yaml_out_file:
+                    yaml.safe_dump(out_doc, video_yaml_out_file)
+
+                video_png_out_filename = video_filename_root + '_corners_v2.png'
+                for i in range(4):
+                    rr, cc = skimage.draw.circle(ys[i], xs[i], 5, mockup.shape)
+                    skimage.draw.set_color(mockup, (rr, cc), [255, 0, 0])
+                skimage.io.imsave(video_png_out_filename, mockup)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/inferfecalbolicount.py b/tools/inferfecalbolicount.py
new file mode 100644
index 0000000..7164183
--- /dev/null
+++ b/tools/inferfecalbolicount.py
@@ -0,0 +1,245 @@
+import matplotlib
+matplotlib.use("Agg")
+
+import argparse
+import imageio
+import itertools
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import skimage.transform
+
+import torch
+import torch.nn.functional as torchfunc
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+import _init_paths
+import utils.assocembedutil as aeutil
+from config import cfg
+from config import update_config
+
+import models
+
+
+FRAMES_PER_MINUTE = 30 * 60
+
+# Examples:
+#
+#   python -u tools/inferfecalbolicount.py \
+#       --min-heatmap-val 1.5 \
+#       output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-05-0-08/best_state.pth \
+#       experiments/fecalboli/fecalboli_2020-05-0-08.yaml \
+#       one-min-clip-4.avi
+
+#   python -u tools/inferfecalbolicount.py \
+#       --min-heatmap-val 0.75 \
+#       --image-out-dir temp/fbinf \
+#       output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       experiments/fecalboli/fecalboli_2020-06-19_02.yaml \
+#       one-min-clip-4.avi
+
+#     for i in `ls poseintervals-temp/*.avi`
+#     do
+#         echo "PROCESSING ${i}"
+#         python -u tools/inferfecalbolicount.py \
+#             --min-heatmap-val 0.75 \
+#             --image-out-dir "poseintervals-temp" \
+#             output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#             experiments/fecalboli/fecalboli_2020-06-19_02.yaml \
+#             "${i}"
+#     done
+
+def infer_fecal_boli_xy(
+        model,
+        frames,
+        min_heatmap_val,
+        image_out_dir=None,
+        image_name_prefix='fb-inf-'):
+
+    xform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.45, 0.45, 0.45],
+            std=[0.225, 0.225, 0.225],
+        ),
+    ])
+
+    with torch.no_grad():
+
+        batch = []
+        batch_images = []
+        cuda_heatmap = None
+        cuda_localmax = None
+        image_save_counter = 0
+
+        def sync_cuda_preds():
+            nonlocal cuda_heatmap
+            nonlocal cuda_localmax
+            nonlocal image_save_counter
+            
+            # print('=== sync_cuda_preds ===')
+            batch_fecal_boli_xy = []
+
+            if cuda_heatmap is not None:
+                # calculate fecal boli XY and add them to batch_fecal_boli_xy list
+                curr_batch_size = cuda_heatmap.size(0)
+                for batch_frame_index in range(curr_batch_size):
+
+                    frame_cuda_localmax = cuda_localmax[batch_frame_index, 0, ...]
+                    frame_fecal_boli_xy = frame_cuda_localmax.nonzero().cpu()
+                    frame_fecal_boli_xy[...] = frame_fecal_boli_xy[..., [1, 0]].clone()
+
+                    batch_fecal_boli_xy.append(frame_fecal_boli_xy)
+
+                    if image_out_dir is not None:
+                        os.makedirs(image_out_dir, exist_ok=True)
+
+                        data_numpy = batch_images.pop(0)
+
+                        curr_heatmap = cuda_heatmap[batch_frame_index, 0, ...].cpu().numpy()
+                        plt.figure(figsize=(12, 12))
+                        plt.imshow(curr_heatmap, cmap=plt.get_cmap('YlOrRd'))
+                        plt.savefig(os.path.join(image_out_dir, image_name_prefix + 'heat-' + str(image_save_counter) + '.png'))
+
+                        curr_heatmap = np.ma.masked_where(curr_heatmap <= min_heatmap_val, curr_heatmap)
+                        fig = plt.figure(figsize=(24, 12))
+                        ax = fig.gca()
+                        plt.imshow(np.concatenate((data_numpy, data_numpy), axis=1), cmap='gray', vmin=0, vmax=255)
+                        plt.imshow(curr_heatmap, cmap=plt.get_cmap('YlOrRd'), alpha=0.4)
+
+                        image_width = data_numpy.shape[1]
+                        for curr_xy in frame_fecal_boli_xy + torch.tensor([image_width, 0]):
+                            ax.add_artist(plt.Circle(curr_xy, 10, color='r', fill=False))
+                        plt.imshow(np.concatenate((data_numpy, data_numpy), axis=1), cmap='gray', vmin=0, vmax=255, alpha=0.0)
+                        plt.axis('off')
+                        plt.tight_layout()
+                        plt.savefig(os.path.join(image_out_dir, image_name_prefix + str(image_save_counter) + '.png'))
+                        image_save_counter += 1
+                        del batch_images[:cfg.TEST.BATCH_SIZE_PER_GPU]
+
+                cuda_heatmap = None
+                cuda_localmax = None
+
+            return batch_fecal_boli_xy
+
+        def perform_inference():
+            nonlocal cuda_heatmap
+            nonlocal cuda_localmax
+
+            prev_batch_fecal_boli_xy = None
+
+            if batch:
+                batch_tensor = torch.stack(batch[:cfg.TEST.BATCH_SIZE_PER_GPU])
+                del batch[:cfg.TEST.BATCH_SIZE_PER_GPU]
+                batch_tensor = batch_tensor.cuda(non_blocking=True)
+
+                prev_batch_fecal_boli_xy = sync_cuda_preds()
+
+                model_out = model(batch_tensor)
+
+                cuda_heatmap = model_out
+                cuda_localmax = aeutil.localmax2D(cuda_heatmap, min_heatmap_val, 5)
+            else:
+                prev_batch_fecal_boli_xy = sync_cuda_preds()
+
+            return prev_batch_fecal_boli_xy
+
+        for frame_index, image in enumerate(frames):
+
+            if image_out_dir is not None:
+                batch_images.append(image)
+
+            image = xform(image)
+
+            prev_batch_fecal_boli_xy = []
+            batch.append(image)
+            if len(batch) == cfg.TEST.BATCH_SIZE_PER_GPU:
+                prev_batch_fecal_boli_xy = perform_inference()
+
+            for frame_fecal_boli_xy in prev_batch_fecal_boli_xy:
+                yield frame_fecal_boli_xy
+
+        # Drain any remaining batchs. It should require at most two calls.
+        for _ in range(2):
+            prev_batch_fecal_boli_xy = perform_inference()
+            for frame_fecal_boli_xy in prev_batch_fecal_boli_xy:
+                yield frame_fecal_boli_xy
+
+
+def infer_fecal_boli_counts(model, frames, min_heatmap_val, image_out_dir=None, image_name_prefix='fb-inf-'):
+    fecal_boli_xys = infer_fecal_boli_xy(model, frames, min_heatmap_val, image_out_dir, image_name_prefix)
+    for fecal_boli_xy in fecal_boli_xys:
+        yield fecal_boli_xy.size(0)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'model_file',
+        help='the model file to use for inference',
+    )
+    parser.add_argument(
+        'cfg',
+        help='the configuration for the model to use for inference',
+    )
+    parser.add_argument(
+        'video',
+        help='the input video',
+    )
+    parser.add_argument(
+        '--image-out-dir',
+        type=str,
+    )
+    parser.add_argument(
+        '--min-heatmap-val',
+        type=float,
+        default=0.75,
+    )
+    parser.add_argument(
+        '--sample-interval-min',
+        type=int,
+        default=1,
+        help='what sampling interval should we use for frames',
+    )
+
+    args = parser.parse_args()
+    sample_intervals_frames = args.sample_interval_min * 60 * 30
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    with imageio.get_reader(args.video) as frame_reader:
+
+        out_prefex, _ = os.path.splitext(os.path.basename(args.video))
+        out_prefex += '_fecal_boli_'
+
+        frame_reader = itertools.islice(frame_reader, 0, None, sample_intervals_frames)
+        fecal_boli_counts = infer_fecal_boli_counts(
+            model,
+            frame_reader,
+            args.min_heatmap_val,
+            args.image_out_dir,
+            out_prefex)
+        for counts in fecal_boli_counts:
+            print('counts:', counts)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/inferfecalbolicountbatch.py b/tools/inferfecalbolicountbatch.py
new file mode 100644
index 0000000..ef90c81
--- /dev/null
+++ b/tools/inferfecalbolicountbatch.py
@@ -0,0 +1,131 @@
+import argparse
+import imageio
+import itertools
+import numpy as np
+import os
+import yaml
+
+import torch
+import torch.backends.cudnn as cudnn
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+from inferfecalbolicount import infer_fecal_boli_counts
+
+import models
+
+# Examples:
+#
+#   python -u tools/inferfecalbolicountbatch.py \
+#       --root-dir '/media/sheppk/TOSHIBA EXT/cached-data/BTBR_3M_stranger_4day' \
+#       --batch-file '/media/sheppk/TOSHIBA EXT/cached-data/BTBR_3M_stranger_4day/BTBR_3M_stranger_4day-batch-temp.txt' \
+#       output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       experiments/fecalboli/fecalboli_2020-06-19_02.yaml
+#
+#   python -u tools/inferfecalbolicountbatch.py \
+#       --root-dir '/home/sheppk/smb/labshare' \
+#       --batch-file 'data/fecal-boli/Tom-CBAX2B-OFA_batch.txt' \
+#       output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       experiments/fecalboli/fecalboli_2020-06-19_02.yaml
+
+# Strain Survey:
+#   python -u tools/inferfecalbolicountbatch.py \
+#       --allow-missing-video \
+#       --root-dir '/home/sheppk/smb/labshare' \
+#       --batch-file 'data/fecal-boli/strain-survey-batch-2019-05-29.txt' \
+#       output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       experiments/fecalboli/fecalboli_2020-06-19_02.yaml
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'model_file',
+        help='the model file to use for inference',
+    )
+    parser.add_argument(
+        'cfg',
+        help='the configuration for the model to use for inference',
+    )
+    parser.add_argument(
+        '--batch-file',
+        help='path to the file that is a new-line separated'
+             ' list of all videos to process',
+        required=True,
+    )
+    parser.add_argument(
+        '--root-dir',
+        help='the root directory. All paths given in the batch files are relative to this root',
+        required=True,
+    )
+    parser.add_argument(
+        '--min-heatmap-val',
+        type=float,
+        default=0.75,
+    )
+    parser.add_argument(
+        '--sample-interval-min',
+        type=int,
+        default=1,
+        help='what sampling interval should we use for frames',
+    )
+    parser.add_argument(
+        '--allow-missing-video',
+        help='allow missing videos with warning',
+        action='store_true',
+    )
+
+    args = parser.parse_args()
+    sample_intervals_frames = args.sample_interval_min * 60 * 30
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    with open(args.batch_file) as batch_file:
+        for line in batch_file:
+            vid_filename = line.strip()
+            if vid_filename:
+                vid_path = os.path.join(args.root_dir, vid_filename)
+                vid_path_root, _ = os.path.splitext(vid_path)
+                vid_fb_count_path = vid_path_root + '_fecal_boli_counts.yaml'
+                print(vid_fb_count_path)
+
+                if args.allow_missing_video:
+                    if not os.path.exists(vid_path):
+                        print('WARNING: ' + vid_path + ' does not exist')
+                        continue
+                else:
+                    assert os.path.exists(vid_path), vid_path + ' does not exist'
+
+                with imageio.get_reader(vid_path) as frame_reader:
+
+                    frame_reader = itertools.islice(frame_reader, 0, None, sample_intervals_frames)
+                    fecal_boli_counts = infer_fecal_boli_counts(model, frame_reader, args.min_heatmap_val)
+                    fecal_boli_counts = list(fecal_boli_counts)
+                    out_doc = {
+                        'sample_interval_min': args.sample_interval_min,
+                        'fecal_boli_counts': fecal_boli_counts,
+                    }
+                    with open(vid_fb_count_path, 'w') as video_yaml_out_file:
+                        yaml.safe_dump(out_doc, video_yaml_out_file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/infermousepose.py b/tools/infermousepose.py
new file mode 100644
index 0000000..d32e0d4
--- /dev/null
+++ b/tools/infermousepose.py
@@ -0,0 +1,153 @@
+import argparse
+import h5py
+import imageio
+import numpy as np
+import time
+
+import torch
+import torch.nn.functional as torchfunc
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+import models
+
+
+FRAMES_PER_MINUTE = 30 * 60
+
+
+def argmax_2d(tensor):
+    assert tensor.dim() >= 2
+    max_col_vals, max_cols = torch.max(tensor, -1, keepdim=True)
+    max_vals, max_rows = torch.max(max_col_vals, -2, keepdim=True)
+    max_cols = torch.gather(max_cols, -2, max_rows)
+    
+    max_vals = max_vals.squeeze(-1).squeeze(-1)
+    max_rows = max_rows.squeeze(-1).squeeze(-1)
+    max_cols = max_cols.squeeze(-1).squeeze(-1)
+    
+    return max_vals, torch.stack([max_rows, max_cols], -1)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--model-file',
+        help='the model file to use for inference',
+        default=None,
+    )
+
+    parser.add_argument(
+        'cfg',
+        help='the configuration for the model to use for inference',
+    )
+
+    parser.add_argument(
+        'video',
+        help='the input video',
+    )
+
+    parser.add_argument(
+        'poseout',
+        help='the pose estimation output HDF5 file',
+    )
+
+    args = parser.parse_args()
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    if args.model_file:
+        cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    start_time = time.time()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    xform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.45, 0.45, 0.45],
+            std=[0.225, 0.225, 0.225],
+        ),
+    ])
+
+    with torch.no_grad(), imageio.get_reader(args.video) as reader:
+
+        all_preds = []
+        all_maxvals = []
+        batch = []
+
+        cuda_preds = None
+        cuda_maxval = None
+
+        def sync_cuda_preds():
+            nonlocal cuda_preds
+            nonlocal cuda_maxval
+
+            if cuda_preds is not None:
+                all_maxvals.append(cuda_maxval.cpu().numpy())
+                all_preds.append(cuda_preds.cpu().numpy().astype(np.uint16))
+                cuda_maxval = None
+                cuda_preds = None
+
+        def perform_inference():
+            nonlocal cuda_preds
+            nonlocal cuda_maxval
+
+            if batch:
+                batch_tensor = torch.stack([xform(img) for img in batch]).cuda()
+                batch.clear()
+
+                sync_cuda_preds()
+
+                inf_out = model(batch_tensor)
+                in_out_ratio = batch_tensor.size(-1) // inf_out.size(-1)
+                if in_out_ratio == 4:
+                    inf_out = torchfunc.upsample(inf_out, scale_factor=4, mode='bicubic', align_corners=False)
+
+                maxvals, preds = argmax_2d(inf_out)
+                cuda_maxval = maxvals
+                cuda_preds = preds
+
+        for frame_index, image in enumerate(reader):
+
+            if frame_index != 0 and frame_index % FRAMES_PER_MINUTE == 0:
+                curr_time = time.time()
+                cum_time_elapsed = curr_time - start_time
+                print('processed {:.1f} min of video in {:.1f} min'.format(
+                    frame_index / FRAMES_PER_MINUTE,
+                    cum_time_elapsed / 60,
+                ))
+
+            batch.append(image)
+            if len(batch) == cfg.TEST.BATCH_SIZE_PER_GPU:
+                perform_inference()
+
+        perform_inference()
+        sync_cuda_preds()
+
+        all_preds = np.concatenate(all_preds)
+        all_maxvals = np.concatenate(all_maxvals)
+
+        with h5py.File(args.poseout, 'w') as h5file:
+            h5file['poseest/points'] = all_preds
+            h5file['poseest/confidence'] = all_maxvals
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/infermultimousepose.py b/tools/infermultimousepose.py
new file mode 100644
index 0000000..010f5a0
--- /dev/null
+++ b/tools/infermultimousepose.py
@@ -0,0 +1,528 @@
+import argparse
+import h5py
+import imageio
+import numpy as np
+import skimage.transform
+import time
+
+import torch
+import torch.nn.functional as torchfunc
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+import _init_paths
+import utils.assocembedutil as aeutil
+from config import cfg
+from config import update_config
+
+import models
+
+
+FRAMES_PER_MINUTE = 30 * 60
+
+# Examples:
+#
+#   python -u tools/infermultimousepose.py \
+#       --max-instance-count 3 \
+#       ./output-multi-mouse/multimousepose/pose_hrnet/multimouse_2019-11-19_1/best_state.pth \
+#       ./experiments/multimouse/multimouse_2019-11-19_1.yaml \
+#       one-min-clip-800x800.avi \
+#       one-min-clip-800x800_2019-11-19_2.h5
+#
+#   python -u tools/infermultimousepose.py \
+#       --max-instance-count 4 \
+#       ./output-multi-mouse/multimousepose/pose_hrnet/multimouse_2019-11-19_1/best_state.pth \
+#       ./experiments/multimouse/multimouse_2019-11-19_1.yaml \
+#       one-min-clip-5.avi \
+#       one-min-clip-5-2.h5
+#
+#   python -u tools/infermultimousepose.py \
+#       --max-instance-count 4 \
+#       --max-embed-sep-within-instances 0.3 \
+#       --min-embed-sep-between-instances 0.2 \
+#       --min-pose-heatmap-val 1.5 \
+#       ./output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-02-03_06/best_state.pth \
+#       ./experiments/multimouse/multimouse_2020-02-03_06.yaml \
+#       one-min-clip-4.avi \
+#       one-min-clip-4-2020-02-03_06-WEIGHTED_EMBED-HIGHER_PROB.h5
+
+def infer_pose_instances(
+        model, frames,
+        use_neighboring_frames,
+        min_embed_sep, max_embed_sep, max_inst_dist,
+        min_joint_count, max_instance_count, max_pose_dist_px,
+        min_pose_heatmap_val):
+
+    def infer_pose_instances_no_track_id():
+
+        xform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.45, 0.45, 0.45],
+                std=[0.225, 0.225, 0.225],
+            ),
+        ])
+
+        with torch.no_grad():
+
+            start_time = time.time()
+
+            # Build up a list of lists containing PoseInstance objects. The elements
+            # in pose_instances correspond to video frames and the indices of the
+            # nested lists correspond to instances detected within the respective frame.
+            # pose_instances = []
+
+            batch = []
+            cuda_pose_heatmap = None
+            cuda_pose_localmax = None
+            cuda_pose_embed_map = None
+
+            def sync_cuda_preds():
+                nonlocal cuda_pose_heatmap
+                nonlocal cuda_pose_localmax
+                nonlocal cuda_pose_embed_map
+
+                batch_pose_instances = []
+
+                if cuda_pose_heatmap is not None:
+                    # calculate pose instances and add them to pose_instances list
+                    curr_batch_size = cuda_pose_heatmap.size(0)
+                    for batch_frame_index in range(curr_batch_size):
+
+                        # pylint: disable=unsubscriptable-object
+                        frame_pose_instances = aeutil.calc_pose_instances(
+                            cuda_pose_heatmap[batch_frame_index, ...],
+                            cuda_pose_localmax[batch_frame_index, ...],
+                            cuda_pose_embed_map[batch_frame_index, ...],
+                            min_embed_sep,
+                            max_embed_sep,
+                            max_inst_dist)
+
+                        # remove poses that have too few joints
+                        if min_joint_count is not None:
+                            frame_pose_instances = [
+                                pi for pi in frame_pose_instances
+                                if len(pi.keypoints) >= min_joint_count
+                            ]
+
+                        # if we have too many poses remove in order of lowest confidence
+                        if (max_instance_count is not None
+                                and len(frame_pose_instances) > max_instance_count):
+                            frame_pose_instances.sort(key=lambda pi: pi.mean_inst_conf)
+                            del frame_pose_instances[max_instance_count:]
+
+                        batch_pose_instances.append(frame_pose_instances)
+
+                    cuda_pose_heatmap = None
+                    cuda_pose_localmax = None
+                    cuda_pose_embed_map = None
+
+                return batch_pose_instances
+
+            def perform_inference():
+                nonlocal cuda_pose_heatmap
+                nonlocal cuda_pose_localmax
+                nonlocal cuda_pose_embed_map
+
+                prev_batch_pose_instances = None
+
+                if batch:
+                    batch_tensor = torch.stack(batch[:cfg.TEST.BATCH_SIZE_PER_GPU])
+                    del batch[:cfg.TEST.BATCH_SIZE_PER_GPU]
+                    batch_tensor = batch_tensor.cuda(non_blocking=True)
+
+                    prev_batch_pose_instances = sync_cuda_preds()
+
+                    model_out = model(batch_tensor)
+
+                    joint_count = model_out.size(1) // 2
+                    cuda_pose_heatmap = model_out[:, :joint_count, ...]
+                    cuda_pose_localmax = aeutil.localmax2D(cuda_pose_heatmap, min_pose_heatmap_val, 3)
+                    cuda_pose_embed_map = model_out[:, joint_count:, ...]
+                else:
+                    prev_batch_pose_instances = sync_cuda_preds()
+
+                return prev_batch_pose_instances
+
+            for frame_index, image in enumerate(frames):
+
+                if frame_index != 0 and frame_index % (FRAMES_PER_MINUTE // 4) == 0:
+                    curr_time = time.time()
+                    cum_time_elapsed = curr_time - start_time
+                    print('processed {:.2f} min of video in {:.2f} min'.format(
+                        frame_index / FRAMES_PER_MINUTE,
+                        cum_time_elapsed / 60,
+                    ))
+
+                image = xform(image)
+
+                prev_batch_pose_instances = []
+                if use_neighboring_frames:
+                    if len(batch) >= 1:
+                        image[0, ...] = batch[-1][1, ...]
+                        batch[-1][2, ...] = image[1, ...]
+                    if len(batch) == cfg.TEST.BATCH_SIZE_PER_GPU + 1:
+                        prev_batch_pose_instances = perform_inference()
+                else:
+                    batch.append(image)
+                    if len(batch) == cfg.TEST.BATCH_SIZE_PER_GPU:
+                        prev_batch_pose_instances = perform_inference()
+
+                for frame_pose_instances in prev_batch_pose_instances:
+                    yield frame_pose_instances
+
+            # In this while loop we drain any remaining batchs. It should iterate
+            # at most two times.
+            prev_batch_pose_instances = perform_inference()
+            while prev_batch_pose_instances:
+                for frame_pose_instances in prev_batch_pose_instances:
+                    yield frame_pose_instances
+                prev_batch_pose_instances = perform_inference()
+
+    return apply_track_id_to_poses(max_pose_dist_px, infer_pose_instances_no_track_id())
+
+
+def apply_track_id_to_poses(max_pose_dist_px, pose_instances):
+    # we now have a collection of pose instances for every frame. We can try
+    # to join them together into tracks based on pose distance.
+    track_id_counter = 0
+    prev_pose_instances = []
+    for curr_pose_instances in pose_instances:
+        pose_combos = []
+        for prev_pose_i, prev_pose in enumerate(prev_pose_instances):
+            for curr_pose_i, curr_pose in enumerate(curr_pose_instances):
+                curr_dist = aeutil.pose_distance(curr_pose, prev_pose)
+                if curr_dist <= max_pose_dist_px:
+                    pose_combos.append((prev_pose_i, curr_pose_i, curr_dist))
+
+        # sort pose combinations by distance
+        pose_combos.sort(key=lambda pcombo: pcombo[2])
+
+        unmatched_prev_poses = set(range(len(prev_pose_instances)))
+        unmatched_curr_poses = set(range(len(curr_pose_instances)))
+        for prev_pose_i, curr_pose_i, curr_dist in pose_combos:
+            if prev_pose_i in unmatched_prev_poses and curr_pose_i in unmatched_curr_poses:
+                prev_pose = prev_pose_instances[prev_pose_i]
+                curr_pose = curr_pose_instances[curr_pose_i]
+                curr_pose.instance_track_id = prev_pose.instance_track_id
+
+                unmatched_prev_poses.remove(prev_pose_i)
+                unmatched_curr_poses.remove(curr_pose_i)
+
+        for unmatched_pose_i in unmatched_curr_poses:
+            curr_pose = curr_pose_instances[unmatched_pose_i]
+            curr_pose.instance_track_id = track_id_counter
+            track_id_counter += 1
+
+        prev_pose_instances = curr_pose_instances
+
+        yield curr_pose_instances
+
+
+# def resize_frames(frames, height, width):
+#     for frame in frames:
+#         print('BEFORE dtype, shape, min, max:', frame.dtype, frame.shape, frame.min(), frame.max())
+#         frame = skimage.transform.resize(frame, (height, width))
+#         frame = np.round(frame * 255).astype(np.uint8)
+#
+#         print('AFTER  dtype, shape, min, max:', frame.dtype, frame.shape, frame.min(), frame.max())
+#
+#         yield frame
+
+
+def find_same_track_pose(pose, pose_list):
+    for curr_pose in pose_list:
+        if curr_pose.instance_track_id == pose.instance_track_id:
+            return curr_pose
+
+    return None
+
+
+def smooth_poses(pose_instances):
+    frame_count = len(pose_instances)
+    for frame_index, curr_frame_pose_instances in enumerate(pose_instances):
+
+        prev_frame_pose_instances = []
+        if frame_index > 0:
+            prev_frame_pose_instances = pose_instances[frame_index - 1]
+
+        next_frame_pose_instances = []
+        if frame_index < frame_count - 1:
+            next_frame_pose_instances = pose_instances[frame_index + 1]
+
+        for curr_pose_track_instance in curr_frame_pose_instances:
+            prev_pose_track_instance = find_same_track_pose(
+                curr_pose_track_instance,
+                prev_frame_pose_instances)
+            next_pose_track_instance = find_same_track_pose(
+                curr_pose_track_instance,
+                next_frame_pose_instances)
+
+            # we only try to smooth if we have both prev and next pose
+            if prev_pose_track_instance is not None and next_pose_track_instance is not None:
+                curr_joint_indexes = curr_pose_track_instance.keypoints.keys()
+                prev_joint_indexes = prev_pose_track_instance.keypoints.keys()
+                next_joint_indexes = next_pose_track_instance.keypoints.keys()
+
+                # we only try to smooth if we have curr, prev and next keypoints
+                joints_to_smooth = curr_joint_indexes & prev_joint_indexes & next_joint_indexes
+
+                for joint_index in joints_to_smooth:
+                    prev_keypoint = prev_pose_track_instance.keypoints[joint_index]
+                    next_keypoint = next_pose_track_instance.keypoints[joint_index]
+
+                    curr_keypoint = curr_pose_track_instance.keypoints[joint_index]
+                    curr_keypoint['x_pos'] = round(
+                        (curr_keypoint['x_pos'] + prev_keypoint['x_pos'] + next_keypoint['x_pos']) / 3.0)
+                    curr_keypoint['y_pos'] = round(
+                        (curr_keypoint['y_pos'] + prev_keypoint['y_pos'] + next_keypoint['y_pos']) / 3.0)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        'model_file',
+        help='the model file to use for inference',
+    )
+    parser.add_argument(
+        'cfg',
+        help='the configuration for the model to use for inference',
+    )
+    parser.add_argument(
+        'video',
+        help='the input video',
+    )
+    parser.add_argument(
+        'poseout',
+        help='the pose estimation output HDF5 file',
+    )
+    # TODO we should change this to cm units rather than pixels
+    parser.add_argument(
+        '--max-inst-dist-px',
+        help='maximum keypoint separation distance in pixels. For a keypoint to '
+             'be added to an instance there must be at least one point in the '
+             'instance which is within this number of pixels.',
+        type=int,
+        default=150,
+    )
+    parser.add_argument(
+        '--max-embed-sep-within-instances',
+        help='maximum embedding separation allowed for a joint to be added to an existing '
+             'instance within the max distance separation',
+        type=float,
+        default=0.2,
+    )
+    parser.add_argument(
+        '--min-embed-sep-between-instances',
+        help='if two joints of the the same type (eg. both right ear) are within the max '
+             'distance separation and their embedding separation doesn\'t meet or '
+             'exceed this threshold only the point with the highest heatmap value is kept.',
+        type=float,
+        default=0.1,
+    )
+    parser.add_argument(
+        '--min-pose-heatmap-val',
+        type=float,
+        default=0.4,
+    )
+    parser.add_argument(
+        '--max-pose-dist-px',
+        type=float,
+        default=40,
+    )
+    parser.add_argument(
+        '--min-joint-count',
+        help='if a pose instance has fewer than this number of points it is discarded',
+        type=int,
+        default=6,
+    )
+    parser.add_argument(
+        '--max-instance-count',
+        help='a frame should not contain more than this number of poses. If it does, extra poses '
+             'will be discarded in order of least confidence until we meet this threshold.',
+        type=int,
+    )
+    parser.add_argument(
+        '--pose-smoothing',
+        help='apply a smoothing to the pose by averaging position over three frames',
+        action='store_true',
+    )
+
+    args = parser.parse_args()
+
+    # shorten some args
+    max_embed_sep = args.max_embed_sep_within_instances
+    min_embed_sep = args.min_embed_sep_between_instances
+    max_inst_dist = args.max_inst_dist_px
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    # start_time = time.time()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    model_extra = cfg.MODEL.EXTRA
+    use_neighboring_frames = False
+    if 'USE_NEIGHBORING_FRAMES' in model_extra:
+        use_neighboring_frames = model_extra['USE_NEIGHBORING_FRAMES']
+
+    with imageio.get_reader(args.video) as frame_reader:
+
+        # if args.resize_frames:
+        #     resize_height, resize_width = args.resize_frames
+        #     frame_reader = resize_frames(frame_reader, resize_height, resize_width)
+
+        pose_instances = list(infer_pose_instances(
+                model, frame_reader,
+                use_neighboring_frames,
+                min_embed_sep, max_embed_sep, max_inst_dist,
+                args.min_joint_count, args.max_instance_count, args.max_pose_dist_px,
+                args.min_pose_heatmap_val))
+        frame_count = len(pose_instances)
+
+        # remove points that jump too much since they are likely to be errors. A point is only
+        # considered a "jump" if it's distance is too great for the previous and next frame
+        # position of the corresponding point.
+        for frame_index in range(frame_count):
+            curr_frame_pose_instances = pose_instances[frame_index]
+
+            prev_frame_pose_instances = []
+            if frame_index > 0:
+                prev_frame_pose_instances = pose_instances[frame_index - 1]
+
+            next_frame_pose_instances = []
+            if frame_index < frame_count - 1:
+                next_frame_pose_instances = pose_instances[frame_index + 1]
+
+            for curr_pose_track_instance in curr_frame_pose_instances:
+                prev_pose_track_instance = find_same_track_pose(
+                    curr_pose_track_instance,
+                    prev_frame_pose_instances)
+                next_pose_track_instance = find_same_track_pose(
+                    curr_pose_track_instance,
+                    next_frame_pose_instances)
+
+                if prev_pose_track_instance is not None or next_pose_track_instance is not None:
+                    for keypoint in list(curr_pose_track_instance.keypoints.values()):
+                        prev_next_dists = []
+
+                        try:
+                            prev_keypoint = prev_pose_track_instance.keypoints[keypoint['joint_index']]
+                            prev_next_dists.append(aeutil.xy_dist(prev_keypoint, keypoint))
+                        except:
+                            pass
+
+                        try:
+                            next_keypoint = next_pose_track_instance.keypoints[keypoint['joint_index']]
+                            prev_next_dists.append(aeutil.xy_dist(next_keypoint, keypoint))
+                        except:
+                            pass
+
+                        # here is where we remove the point if it's too far from it's neighbors
+                        if prev_next_dists:
+                            if all(dist > args.max_pose_dist_px for dist in prev_next_dists):
+                                del curr_pose_track_instance.keypoints[keypoint['joint_index']]
+
+        # get rid of poses that don't meet our point count threshold
+        for frame_index in range(frame_count):
+            curr_frame_pose_instances = pose_instances[frame_index]
+            pose_instances[frame_index] = [
+                p for p in curr_frame_pose_instances
+                if len(p.keypoints) >= args.min_joint_count
+            ]
+
+        # get rid of "isolated" poses (where there is no previous or next pose
+        # with the same track ID)
+        for frame_index in range(frame_count):
+            curr_frame_pose_instances = pose_instances[frame_index]
+
+            prev_frame_pose_instances = []
+            if frame_index > 0:
+                prev_frame_pose_instances = pose_instances[frame_index - 1]
+
+            next_frame_pose_instances = []
+            if frame_index < frame_count - 1:
+                next_frame_pose_instances = pose_instances[frame_index + 1]
+
+            pose_instances[frame_index] = [
+                p for p in curr_frame_pose_instances
+
+                # if not isolated
+                if
+                    find_same_track_pose(p, prev_frame_pose_instances) is not None or
+                    find_same_track_pose(p, next_frame_pose_instances) is not None
+            ]
+
+        # now that we've made all of these changes we should update the track ids
+        pose_instances = list(apply_track_id_to_poses(args.max_pose_dist_px, pose_instances))
+
+        if args.pose_smoothing:
+            smooth_poses(pose_instances)
+
+        max_instance_count = 0
+        for curr_pose_instances in pose_instances:
+
+            if len(curr_pose_instances) > max_instance_count:
+                max_instance_count = len(curr_pose_instances)
+
+            # print(
+            #     'pose_count:', len(curr_pose_instances),
+            #     'track_ids:', ' '.join([
+            #         str(p.instance_track_id)
+            #         for p
+            #         in sorted(curr_pose_instances, key=lambda pose: pose.instance_track_id)]))
+
+        # save data to an HDF5 file
+        points = np.zeros(
+            (frame_count, max_instance_count, 12, 2),
+            dtype=np.uint16)
+        confidence = np.zeros(
+            (frame_count, max_instance_count, 12),
+            dtype=np.float32)
+        instance_count = np.zeros(
+            frame_count,
+            dtype=np.uint8)
+        embed = np.zeros(
+            (frame_count, max_instance_count, 12),
+            dtype=np.float32)
+        instance_track_id = np.zeros(
+            (frame_count, max_instance_count),
+            dtype=np.uint32)
+
+        for frame_index, frame_pose_instances in enumerate(pose_instances):
+            instance_count[frame_index] = len(frame_pose_instances)
+            for pose_index, pose_instance in enumerate(frame_pose_instances):
+                instance_track_id[frame_index, pose_index] = pose_instance.instance_track_id
+                for keypoint in pose_instance.keypoints.values():
+                    points[frame_index, pose_index, keypoint['joint_index'], 0] = keypoint['y_pos']
+                    points[frame_index, pose_index, keypoint['joint_index'], 1] = keypoint['x_pos']
+                    confidence[frame_index, pose_index, keypoint['joint_index']] = keypoint['conf']
+                    embed[frame_index, pose_index, keypoint['joint_index']] = keypoint['embed']
+
+        with h5py.File(args.poseout, 'w') as h5file:
+            h5file['poseest/points'] = points
+            h5file['poseest/confidence'] = confidence
+            h5file['poseest/instance_count'] = instance_count
+            h5file['poseest/instance_embedding'] = embed
+            h5file['poseest/instance_track_id'] = instance_track_id
+
+            h5file['poseest'].attrs['version'] = np.array([3, 0], dtype=np.uint16)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/inferobjects.py b/tools/inferobjects.py
new file mode 100644
index 0000000..1fe7edd
--- /dev/null
+++ b/tools/inferobjects.py
@@ -0,0 +1,267 @@
+import argparse
+import h5py
+import imageio
+import numpy as np
+import skimage.measure
+import time
+
+import torch
+import torch.nn.functional as torchfunc
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+import models
+
+import matplotlib.pyplot as plt
+
+FRAMES_PER_SECOND = 30
+FRAMES_PER_MINUTE = FRAMES_PER_SECOND * 60
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--model-file',
+        help='the model file to use for inference',
+        default=None,
+    )
+
+    parser.add_argument(
+        '--iou-threshold',
+        help='the minimum IOU threshold used for saying two object masks match',
+        type=float,
+        default=0.95,
+    )
+
+    parser.add_argument(
+        '--min-obj-size-px',
+        help='the minimum object size in pixels',
+        type=int,
+        default=400,
+    )
+
+    parser.add_argument(
+        '--minimum-arrangement-duration-secs',
+        help='the minimum duration in seconds before an object arrangement is considered valid',
+        type=float,
+        default=60,
+    )
+
+    parser.add_argument(
+        '--maximum-merge-duration-secs',
+        help='the maximum gap in seconds over which we will try to merge arrangements',
+        type=float,
+        default=0.5,
+    )
+
+    parser.add_argument(
+        'cfg',
+        help='the configuration for the model to use for inference',
+    )
+
+    parser.add_argument(
+        'video',
+        help='the input video',
+    )
+
+    parser.add_argument(
+        'segout',
+        help='the segmentation output HDF5 file',
+    )
+
+    args = parser.parse_args()
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    if args.model_file:
+        cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    max_merge_frames = round(args.maximum_merge_duration_secs * FRAMES_PER_SECOND)
+    min_duration_frames = round(args.minimum_arrangement_duration_secs * FRAMES_PER_SECOND)
+
+    start_time = time.time()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    xform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.45, 0.45, 0.45],
+            std=[0.225, 0.225, 0.225],
+        ),
+    ])
+
+
+    def gen_segs():
+
+        with torch.no_grad(), imageio.get_reader(args.video) as reader:
+            batch = []
+            cuda_segs = None
+
+            def perform_inference():
+                nonlocal cuda_segs
+
+                if batch:
+                    batch_tensor = torch.stack([xform(img) for img in batch]).cuda()
+                    batch.clear()
+
+                    inf_out = model(batch_tensor)
+                    in_out_ratio = batch_tensor.size(-1) // inf_out.size(-1)
+                    if in_out_ratio == 4:
+                        inf_out = torchfunc.upsample(inf_out, scale_factor=4, mode='bicubic', align_corners=False)
+
+                    cuda_segs = inf_out >= 0.0
+                    cuda_segs = cuda_segs.squeeze(1)
+
+            for image in reader:
+
+                batch.append(image)
+                if len(batch) == cfg.TEST.BATCH_SIZE_PER_GPU:
+                    if cuda_segs is not None:
+                        cpu_segs = cuda_segs.cpu()
+                        cuda_segs = None
+                        perform_inference()
+
+                        for i in range(cpu_segs.size(0)):
+                            yield cpu_segs[i, ...].numpy()
+                    else:
+                        perform_inference()
+
+            if cuda_segs is not None:
+                cpu_segs = cuda_segs.cpu()
+                cuda_segs = None
+
+                for i in range(cpu_segs.size(0)):
+                    yield cpu_segs[i, ...].numpy()
+
+            perform_inference()
+            if cuda_segs is not None:
+                cpu_segs = cuda_segs.cpu()
+                cuda_segs = None
+
+                for i in range(cpu_segs.size(0)):
+                    yield cpu_segs[i, ...].numpy()
+
+    def gen_accum_masks():
+        start_frame = 0
+        accum_mask = None
+        for frame_index, seg_mask in enumerate(gen_segs()):
+            seg_mask = seg_mask.astype(np.bool)
+
+            if accum_mask is None:
+                start_frame = frame_index
+                accum_mask = np.array(seg_mask, dtype=np.uint32)
+            else:
+                avg_mask = accum_mask / (frame_index - start_frame) >= 0.5
+                sum_avg_mask = avg_mask.sum()
+                if sum_avg_mask < args.min_obj_size_px:
+                    start_frame = frame_index
+                    accum_mask = np.array(seg_mask, dtype=np.uint32)
+                else:
+                    # perform IOU for the running average object mask vs current seg_mask
+                    intersection = np.sum(avg_mask & seg_mask)
+                    union = np.sum(avg_mask | seg_mask)
+                    curr_iou = intersection / union
+
+                    if curr_iou < args.iou_threshold:
+                        # we've passed the threshold of a new object configuration
+                        if 1 + frame_index - start_frame >= min_duration_frames:
+                            yield accum_mask, start_frame, frame_index
+
+                        start_frame = frame_index
+                        accum_mask = np.array(seg_mask, dtype=np.uint32)
+                    else:
+                        accum_mask += seg_mask
+
+            if frame_index != 0 and frame_index % FRAMES_PER_MINUTE == 0:
+                curr_time = time.time()
+                cum_time_elapsed = curr_time - start_time
+                print('processed {:.1f} min of video in {:.1f} min'.format(
+                    frame_index / FRAMES_PER_MINUTE,
+                    cum_time_elapsed / 60,
+                ))
+
+        if accum_mask is not None:
+            frame_index += 1
+
+            avg_mask = accum_mask / (frame_index - start_frame) >= 0.5
+            sum_avg_mask = avg_mask.sum()
+            if (sum_avg_mask >= args.min_obj_size_px
+                    and 1 + frame_index - start_frame >= min_duration_frames):
+                yield accum_mask, start_frame, frame_index
+
+    def merge_accum_masks(accum_masks):
+        accum_mask = None
+        avg_mask = None
+        start_frame = 0
+        end_frame = 0
+
+        for next_accum_mask, next_start_frame, next_end_frame in accum_masks:
+            next_avg_mask = next_accum_mask / (next_end_frame - next_start_frame) >= 0.5
+
+            merge_happened = False
+            if accum_mask is not None:
+                frame_gap = next_start_frame - end_frame
+                if frame_gap <= max_merge_frames:
+                    # perform IOU for the masks
+                    intersection = np.sum(avg_mask & next_avg_mask)
+                    union = np.sum(avg_mask | next_avg_mask)
+                    curr_iou = intersection / union
+                    if curr_iou >= args.iou_threshold:
+                        # we can perform the merge
+                        accum_mask += next_accum_mask
+                        end_frame = next_end_frame
+                        merge_happened = True
+
+                if not merge_happened:
+                    yield accum_mask, start_frame, end_frame
+
+            if not merge_happened:
+                accum_mask = next_accum_mask
+                avg_mask = next_avg_mask
+                start_frame = next_start_frame
+                end_frame = next_end_frame
+
+        if accum_mask is not None:
+            yield accum_mask, start_frame, end_frame
+
+    with h5py.File(args.segout, 'w') as segout_h5:
+        seg_group_index = 0
+        accum_masks = gen_accum_masks()
+        if max_merge_frames > 0:
+            accum_masks = merge_accum_masks(accum_masks)
+
+        for accum_mask, start_frame, end_frame in accum_masks:
+            print('start frame:', start_frame, 'end frame:', end_frame)
+            avg_mask = accum_mask / (end_frame - start_frame) >= 0.5
+
+            labels, label_count = skimage.measure.label(avg_mask, return_num=True)
+            obj_mask_gen = (labels == i for i in range(1, label_count + 1))
+            obj_masks = [mask.astype(np.uint8) for mask in obj_mask_gen if mask.sum() >= args.min_obj_size_px]
+
+            if obj_masks:
+                mask_ds_path = 'objectsegs/arrangement_' + str(seg_group_index)
+                segout_h5[mask_ds_path] = np.stack(obj_masks)
+                segout_h5[mask_ds_path].attrs['start_frame'] = start_frame
+                segout_h5[mask_ds_path].attrs['end_frame'] = end_frame
+                seg_group_index += 1
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/listcvatnetids.py b/tools/listcvatnetids.py
new file mode 100644
index 0000000..8778c11
--- /dev/null
+++ b/tools/listcvatnetids.py
@@ -0,0 +1,48 @@
+import argparse
+import re
+
+import csv
+import functools
+import itertools
+import os
+import pprint
+import random
+import shutil
+
+import _init_paths
+from dataset.multimousepose import parse_poses
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='list all of the vidoe netids used for the cvat annotations',
+    )
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files',
+                        nargs='+',
+                        required=True,
+                        type=str)
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    # Data loading code
+    img_pat = re.compile(r'(.+\.avi)_([0-9]+)\.png')
+    pose_labels = list(itertools.chain.from_iterable(parse_poses(f) for f in args.cvat_files))
+    for pose_label in pose_labels:
+        img_name = pose_label['image_name']
+
+        m = img_pat.match(pose_label['image_name'])
+        assert m
+
+        print(m.group(1).replace('+', '/'))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/mousetrain.py b/tools/mousetrain.py
new file mode 100755
index 0000000..9787fad
--- /dev/null
+++ b/tools/mousetrain.py
@@ -0,0 +1,259 @@
+# ------------------------------------------------------------------------------
+# Copyright (c) Microsoft
+# Licensed under the MIT License.
+# Written by Bin Xiao (Bin.Xiao@microsoft.com)
+# ------------------------------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import csv
+import os
+import pprint
+import shutil
+
+import torch
+import torch.nn.parallel
+import torch.backends.cudnn as cudnn
+import torch.optim
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+from tensorboardX import SummaryWriter
+
+import _init_paths
+from config import cfg
+from config import update_config
+from core.loss import JointsMSELoss
+from core.focalloss import bce_focal_loss
+from core.function import train
+from core.function import validate
+from utils.utils import get_optimizer
+from utils.utils import save_checkpoint
+from utils.utils import create_logger
+from utils.utils import get_model_summary
+
+import dataset
+import models
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train keypoints network')
+    # general
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    # philly
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--prevModelDir',
+                        help='prev Model directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+    update_config(cfg, args)
+
+    logger, final_output_dir, tb_log_dir = create_logger(
+        cfg, args.cfg, 'train')
+
+    logger.info(pprint.pformat(args))
+    logger.info(cfg)
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(
+        cfg, is_train=True
+    )
+
+    # copy model file
+    this_dir = os.path.dirname(__file__)
+    shutil.copy2(
+        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
+        final_output_dir)
+    # logger.info(pprint.pformat(model))
+
+    # writer_dict = {
+    #     'writer': SummaryWriter(log_dir=tb_log_dir),
+    #     'train_global_steps': 0,
+    #     'valid_global_steps': 0,
+    # }
+
+    # THIS FUNCTIONALITY IS BROKEN UNTIL
+    # https://github.com/pytorch/pytorch/issues/19374 IS FIXED
+    # dump_input = torch.rand(
+    #     (1, 3, cfg.MODEL.IMAGE_SIZE[1], cfg.MODEL.IMAGE_SIZE[0])
+    # )
+    # writer_dict['writer'].add_graph(model, (dump_input, ))
+    #
+    # logger.info(get_model_summary(model, dump_input))
+
+    model = torch.nn.DataParallel(model, device_ids=cfg.GPUS).cuda()
+
+    # define loss function (criterion) and optimizer
+    if cfg.LOSS.USE_FOCAL_LOSS:
+        print('USING FOCAL LOSS')
+        def fl_crit(input, target, target_weight):
+            return bce_focal_loss(input, target)
+        criterion = fl_crit
+    else:
+        print('USING MSE LOSS')
+        criterion = JointsMSELoss(
+            use_target_weight=cfg.LOSS.USE_TARGET_WEIGHT
+        ).cuda()
+
+    # Data loading code
+    # normalize = transforms.Normalize(
+    #     mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
+    # )
+    normalize = transforms.Normalize(
+        mean=[0.45], std=[0.225]
+    )
+    train_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
+        cfg, cfg.DATASET.ROOT, cfg.DATASET.TRAIN_SET, True,
+        transforms.Compose([
+            transforms.ToTensor(),
+            normalize,
+        ])
+    )
+    print('len(train_dataset):', len(train_dataset))
+    valid_dataset = eval('dataset.'+cfg.DATASET.DATASET)(
+        cfg, cfg.DATASET.ROOT, cfg.DATASET.TEST_SET, False,
+        transforms.Compose([
+            transforms.ToTensor(),
+            normalize,
+        ])
+    )
+    print('len(valid_dataset):', len(valid_dataset))
+
+    print('init train loader')
+    train_loader = torch.utils.data.DataLoader(
+        train_dataset,
+        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
+        shuffle=cfg.TRAIN.SHUFFLE,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+    print('init valid loader')
+    valid_loader = torch.utils.data.DataLoader(
+        valid_dataset,
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU*len(cfg.GPUS),
+        shuffle=False,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+
+    best_perf = None
+    best_model = False
+    last_epoch = -1
+    optimizer = get_optimizer(cfg, model)
+    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
+    checkpoint_file = os.path.join(
+        final_output_dir, 'checkpoint.pth'
+    )
+
+    if cfg.AUTO_RESUME and os.path.exists(checkpoint_file):
+        logger.info("=> loading checkpoint '{}'".format(checkpoint_file))
+        checkpoint = torch.load(checkpoint_file)
+        begin_epoch = checkpoint['epoch']
+        best_perf = checkpoint['perf']
+        last_epoch = checkpoint['epoch']
+        model.load_state_dict(checkpoint['state_dict'])
+
+        optimizer.load_state_dict(checkpoint['optimizer'])
+        logger.info("=> loaded checkpoint '{}' (epoch {})".format(
+            checkpoint_file, checkpoint['epoch']))
+
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
+        last_epoch=last_epoch
+    )
+
+    train_table_fname = os.path.join(final_output_dir, 'training.tsv')
+    val_table_fname = os.path.join(final_output_dir, 'validation.tsv')
+    with    open(train_table_fname, 'w', newline='') as train_table_f, \
+            open(val_table_fname, 'w', newline='') as val_table_f:
+
+        train_header = ['Epoch', 'Batch', 'Loss', 'Accuracy', 'Batch Time', 'Batch Size']
+        train_table_writer = csv.DictWriter(train_table_f, fieldnames=train_header, delimiter='\t')
+        train_table_writer.writeheader()
+
+        val_header = ['Epoch', 'Loss', 'Accuracy', 'Performance Indicator']
+        val_table_writer = csv.DictWriter(val_table_f, fieldnames=val_header, delimiter='\t')
+        val_table_writer.writeheader()
+
+        print('entering epoch loop from:', begin_epoch, 'to', cfg.TRAIN.END_EPOCH)
+        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
+            lr_scheduler.step()
+
+            # train for one epoch
+            train(
+                cfg, train_loader, model, criterion, optimizer, epoch,
+                final_output_dir, tb_log_dir, writer_dict=None,
+                dict_writer=train_table_writer)
+
+
+            # evaluate on validation set
+            perf_indicator = validate(
+                cfg, valid_loader, valid_dataset, model, criterion,
+                final_output_dir, tb_log_dir, writer_dict=None,
+                dict_writer=val_table_writer, epoch=epoch,
+            )
+
+            if best_perf is None or perf_indicator >= best_perf:
+                best_perf = perf_indicator
+                best_model = True
+                print('*** NEW BEST ***', perf_indicator)
+            else:
+                best_model = False
+
+            logger.info('=> saving checkpoint to {}'.format(final_output_dir))
+            save_checkpoint({
+                'epoch': epoch + 1,
+                'model': cfg.MODEL.NAME,
+                'state_dict': model.state_dict(),
+                'best_state_dict': model.module.state_dict(),
+                'perf': perf_indicator,
+                'optimizer': optimizer.state_dict(),
+            }, best_model, final_output_dir)
+
+        final_model_state_file = os.path.join(
+            final_output_dir, 'final_state.pth'
+        )
+        logger.info('=> saving final model state to {}'.format(
+            final_model_state_file)
+        )
+        torch.save(model.module.state_dict(), final_model_state_file)
+        # writer_dict['writer'].close()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/ofobjecttrain.py b/tools/ofobjecttrain.py
new file mode 100644
index 0000000..36736df
--- /dev/null
+++ b/tools/ofobjecttrain.py
@@ -0,0 +1,194 @@
+# base this off of mousetrain.py... but we need to use BCEWithLogitsLoss
+# https://pytorch.org/docs/stable/nn.html#bcewithlogitsloss
+
+import argparse
+import csv
+import os
+import pprint
+import random
+import shutil
+
+import torch
+import torch.backends.cudnn as cudnn
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+from core.segfunction import train, validate
+from dataset.OpenFieldObjDataset import OpenFieldObjDataset, parse_obj_labels
+from utils.utils import get_optimizer
+from utils.utils import save_checkpoint
+from utils.utils import create_logger
+
+import models
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Train keypoints network')
+    # general
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    # philly
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--prevModelDir',
+                        help='prev Model directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+    update_config(cfg, args)
+
+    logger, final_output_dir, tb_log_dir = create_logger(
+        cfg, args.cfg, 'train')
+
+    logger.info(pprint.pformat(args))
+    logger.info(cfg)
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True).cuda()
+
+    # copy model file
+    this_dir = os.path.dirname(__file__)
+    shutil.copy2(
+        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
+        final_output_dir)
+
+    criterion = torch.nn.BCEWithLogitsLoss().cuda()
+
+    # Data loading code
+    obj_labels = list(parse_obj_labels(cfg.DATASET.CVAT_XML))
+    validation_set_filename = cfg.DATASET.TEST_SET
+    val_img_names = set()
+    if os.path.exists(validation_set_filename):
+        with open(validation_set_filename) as val_file:
+            for curr_line in val_file:
+                img_name = curr_line.strip()
+                val_img_names.add(img_name)
+
+    else:
+        img_names = {lbl['image_name'] for lbl in obj_labels}
+        val_count = round(len(img_names) * cfg.DATASET.TEST_SET_PROPORTION)
+        val_img_names = set(random.sample(img_names, val_count))
+
+        logger.info("=> saving validation image names to '{}'".format(validation_set_filename))
+        with open(validation_set_filename, 'w') as val_file:
+            for img_name in val_img_names:
+                val_file.write(img_name)
+                val_file.write('\n')
+
+    train_labels = [lbl for lbl in obj_labels if lbl['image_name'] not in val_img_names]
+    train_ofods = OpenFieldObjDataset(
+        cfg,
+        train_labels,
+        True,
+        transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485], std=[0.229]),
+        ]),
+    )
+    train_loader = torch.utils.data.DataLoader(
+        train_ofods,
+        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU,
+        shuffle=cfg.TRAIN.SHUFFLE,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+
+    val_labels = [lbl for lbl in obj_labels if lbl['image_name'] in val_img_names]
+    val_ofods = OpenFieldObjDataset(
+        cfg,
+        val_labels,
+        False,
+        transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485], std=[0.229]),
+        ]),
+    )
+    valid_loader = torch.utils.data.DataLoader(
+        val_ofods,
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU,
+        shuffle=False,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+
+    logger.info("=> full data set size: {}; training/validation: {} [{}]/{} [{}]".format(
+        len(obj_labels), len(train_labels), len(train_ofods), len(val_labels), len(val_ofods)))
+
+    best_perf = None
+    last_epoch = -1
+    optimizer = get_optimizer(cfg, model)
+    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
+        last_epoch=last_epoch
+    )
+
+    train_table_fname = os.path.join(final_output_dir, 'training.tsv')
+    val_table_fname = os.path.join(final_output_dir, 'validation.tsv')
+    with    open(train_table_fname, 'w', newline='') as train_table_f, \
+            open(val_table_fname, 'w', newline='') as val_table_f:
+
+        train_header = ['Epoch', 'Batch', 'Loss', 'Batch Time', 'Batch Size']
+        train_table_writer = csv.DictWriter(train_table_f, fieldnames=train_header, delimiter='\t')
+        train_table_writer.writeheader()
+
+        val_header = ['Epoch', 'Loss', 'Accuracy']
+        val_table_writer = csv.DictWriter(val_table_f, fieldnames=val_header, delimiter='\t')
+        val_table_writer.writeheader()
+
+        logger.info('entering epoch loop from: {} to {}'.format(begin_epoch, cfg.TRAIN.END_EPOCH))
+        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
+            lr_scheduler.step()
+
+            # train for one epoch
+            train(cfg, train_loader, model, criterion, optimizer, epoch, train_table_writer)
+
+            # evaluate on validation set
+            perf_indicator = validate(
+                cfg, valid_loader, model,
+                criterion, val_table_writer, epoch)
+
+            if best_perf is None or perf_indicator >= best_perf:
+                best_perf = perf_indicator
+                logger.info('*** NEW BEST *** {}'.format(perf_indicator))
+                best_model_state_file = os.path.join(final_output_dir, 'best_state.pth')
+                logger.info('=> saving best model state to {}'.format(best_model_state_file))
+                torch.save(model.state_dict(), best_model_state_file)
+
+        final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
+        logger.info('=> saving final model state to {}'.format(final_model_state_file))
+        torch.save(model.state_dict(), final_model_state_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/rendercvat.py b/tools/rendercvat.py
new file mode 100644
index 0000000..b2ad214
--- /dev/null
+++ b/tools/rendercvat.py
@@ -0,0 +1,86 @@
+import argparse
+import cv2
+import numpy as np
+import os
+import skimage
+import xml.etree.ElementTree as ET
+
+
+def parse_cvat(cvat_xml_path):
+    root = ET.parse(cvat_xml_path)
+    for image_elem in root.findall('./image'):
+        img_name = image_elem.attrib['name']
+
+        polylines = []
+        for polyline_elem in image_elem.findall('./polyline'):
+            xy_strs = [
+                xy_str.split(',')
+                for xy_str in polyline_elem.attrib['points'].split(';')
+            ]
+            assert len(xy_strs) 
+
+            xy_points = np.array(
+                [(float(x_str), float(y_str)) for x_str, y_str in xy_strs],
+                dtype=np.float32,
+            )
+
+            polylines.append(xy_points)
+
+        yield {
+            'image_name': img_name,
+            'polylines': polylines,
+        }
+
+
+def render_polyline_overlay(image, polyline, color=(255, 255, 255)):
+
+    polyline_rounded = np.rint(polyline).astype(np.int)
+
+    # first the outline in black
+    cv2.polylines(image, [polyline_rounded], False, (0, 0, 0), 2, cv2.LINE_AA)
+    for point_x, point_y in polyline:
+        cv2.circle(image, (point_x, point_y), 3, (0, 0, 0), -1, cv2.LINE_AA)
+
+    # then inner trace with color
+    cv2.polylines(image, [polyline_rounded], False, color, 1, cv2.LINE_AA)
+    for point_x, point_y in polyline:
+        cv2.circle(image, (point_x, point_y), 2, color, -1, cv2.LINE_AA)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='render cvat annotations')
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files to use',
+                        nargs='+',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-dir',
+                        help='directory containing images',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-out-dir',
+                        help='the directory we render to',
+                        required=True,
+                        type=str)
+
+    args = parser.parse_args()
+
+    if args.image_out_dir is not None:
+        os.makedirs(args.image_out_dir, exist_ok=True)
+
+    for cvat_file in args.cvat_files:
+        for image_labels in parse_cvat(cvat_file):
+            image_path = os.path.join(args.image_dir, image_labels['image_name'])
+            image_out_path = os.path.join(args.image_out_dir, image_labels['image_name'])
+            if os.path.exists(image_path):
+                # image_data_numpy = skimage.io.imread(image_path, as_gray=True)
+                image_data_numpy = skimage.io.imread(image_path)
+                for polyline in image_labels['polylines']:
+                    render_polyline_overlay(image_data_numpy, polyline)
+
+                skimage.io.imsave(image_out_path, image_data_numpy)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/rendervidoverlay.py b/tools/rendervidoverlay.py
new file mode 100644
index 0000000..0830fe2
--- /dev/null
+++ b/tools/rendervidoverlay.py
@@ -0,0 +1,360 @@
+import argparse
+import cv2
+import imageio
+import multiprocessing as mp
+import numpy as np
+import os
+import h5py
+
+
+NOSE_INDEX = 0
+LEFT_EAR_INDEX = 1
+RIGHT_EAR_INDEX = 2
+BASE_NECK_INDEX = 3
+LEFT_FRONT_PAW_INDEX = 4
+RIGHT_FRONT_PAW_INDEX = 5
+CENTER_SPINE_INDEX = 6
+LEFT_REAR_PAW_INDEX = 7
+RIGHT_REAR_PAW_INDEX = 8
+BASE_TAIL_INDEX = 9
+MID_TAIL_INDEX = 10
+TIP_TAIL_INDEX = 11
+
+
+CONNECTED_SEGMENTS = [
+        [LEFT_FRONT_PAW_INDEX, CENTER_SPINE_INDEX, RIGHT_FRONT_PAW_INDEX],
+        [LEFT_REAR_PAW_INDEX, BASE_TAIL_INDEX, RIGHT_REAR_PAW_INDEX],
+        [
+            NOSE_INDEX, BASE_NECK_INDEX, CENTER_SPINE_INDEX,
+            BASE_TAIL_INDEX, MID_TAIL_INDEX, TIP_TAIL_INDEX,
+        ],
+]
+
+# from: http://colorbrewer2.org/?type=qualitative&scheme=Set3&n=8
+# COLOR_PALETTE = [
+#     (141,211,199),
+#     (255,255,179),
+#     (190,186,218),
+#     (251,128,114),
+#     (128,177,211),
+#     (253,180,98),
+#     (179,222,105),
+#     (252,205,229),
+# ]
+
+COLOR_PALETTE = [
+    (166,206,227),
+    (31,120,180),
+    (178,223,138),
+    (51,160,44),
+    (251,154,153),
+    (227,26,28),
+    (253,191,111),
+    (255,127,0),
+    (202,178,214),
+    (106,61,154),
+    (255,255,153)]
+
+def render_pose_overlay(image, frame_points, exclude_points, color=(255, 255, 255)):
+
+    # we need to fragment lines if exclude_points breaks up
+    # (or removes completely) line segments
+    def gen_line_fragments():
+        curr_fragment = []
+        for curr_pt_indexes in CONNECTED_SEGMENTS:
+            for curr_pt_index in curr_pt_indexes:
+                if curr_pt_index in exclude_points:
+                    if len(curr_fragment) >= 2:
+                        yield curr_fragment
+                    curr_fragment = []
+                else:
+                    curr_fragment.append(curr_pt_index)
+
+            if len(curr_fragment) >= 2:
+                yield curr_fragment
+            curr_fragment = []
+    line_pt_indexes = list(gen_line_fragments())
+
+    for curr_line_indexes in line_pt_indexes:
+        line_pts = np.array(
+            [(pt_x, pt_y) for pt_y, pt_x in frame_points[curr_line_indexes]],
+            np.int32)
+        cv2.polylines(image, [line_pts], False, (0, 0, 0), 2, cv2.LINE_AA)
+
+    for point_index in range(12):
+        if point_index in exclude_points:
+            continue
+
+        point_y, point_x = frame_points[point_index, :]
+
+        cv2.circle(image, (point_x, point_y), 3, (0, 0, 0), -1, cv2.LINE_AA)
+
+    for curr_line_indexes in line_pt_indexes:
+        line_pts = np.array(
+            [(pt_x, pt_y) for pt_y, pt_x in frame_points[curr_line_indexes]],
+            np.int32)
+        cv2.polylines(image, [line_pts], False, color, 1, cv2.LINE_AA)
+
+    for point_index in range(12):
+        if point_index in exclude_points:
+            continue
+
+        point_y, point_x = frame_points[point_index, :]
+
+        cv2.circle(image, (point_x, point_y), 2, color, -1, cv2.LINE_AA)
+
+
+def render_pose_v3_overlay(
+        image,
+        frame_points,
+        frame_confidence,
+        frame_track_ids,
+        exclude_points):
+
+    instance_count = frame_points.shape[0]
+
+    id_color_dict = dict()
+    avail_color_idxs = set(range(len(COLOR_PALETTE)))
+    sorted_ids = sorted(frame_track_ids)
+
+    if len(frame_track_ids) <= len(COLOR_PALETTE):
+        for curr_id in sorted_ids:
+            curr_color_idx = curr_id % len(COLOR_PALETTE)
+            offset = 0
+            while curr_color_idx not in avail_color_idxs:
+                offset += 1
+                curr_color_idx = (curr_id + offset) % len(COLOR_PALETTE)
+
+            id_color_dict[curr_id] = COLOR_PALETTE[curr_color_idx]
+            avail_color_idxs.remove(curr_color_idx)
+    else:
+        id_color_dict = {i: (255, 255, 255) for i in sorted_ids}
+
+    for instance_index in range(instance_count):
+
+        # for this instance we add zero confidence points to the
+        # set of excluded point indexes
+        inst_confidence = frame_confidence[instance_index, :]
+        zero_conf_indexes = set((inst_confidence == 0).nonzero()[0])
+        inst_exclude_points = exclude_points | zero_conf_indexes
+
+        render_pose_overlay(
+            image,
+            frame_points[instance_index, ...],
+            inst_exclude_points,
+            id_color_dict[frame_track_ids[instance_index]])
+
+
+def process_video(in_video_path, pose_h5_path, out_video_path, exclude_points):
+    if not os.path.isfile(in_video_path):
+        print('ERROR: missing file: ' + in_video_path, flush=True)
+        return
+
+    if not os.path.isfile(pose_h5_path):
+        print('ERROR: missing file: ' + pose_h5_path, flush=True)
+        return
+
+    with imageio.get_reader(in_video_path) as video_reader, \
+        h5py.File(pose_h5_path, 'r') as pose_h5, \
+        imageio.get_writer(out_video_path, fps=30) as video_writer:
+
+        vid_grp = next(iter(pose_h5.values()))
+        major_version = 2
+        if 'version' in vid_grp.attrs:
+            major_version = vid_grp.attrs['version'][0]
+
+        if major_version == 2:
+            all_points = vid_grp['points'][:]
+            for frame_index, image in enumerate(video_reader):
+
+                render_pose_overlay(
+                    image,
+                    all_points[frame_index, ...],
+                    exclude_points)
+
+                video_writer.append_data(image)
+
+        elif major_version == 3:
+            all_points = vid_grp['points'][:]
+            all_confidence = vid_grp['confidence'][:]
+            all_instance_count = vid_grp['instance_count'][:]
+            all_track_id = vid_grp['instance_track_id'][:]
+            for frame_index, image in enumerate(video_reader):
+
+                frame_instance_count = all_instance_count[frame_index]
+                if frame_instance_count > 0:
+                    render_pose_v3_overlay(
+                        image,
+                        all_points[frame_index, :frame_instance_count, ...],
+                        all_confidence[frame_index, :frame_instance_count, ...],
+                        all_track_id[frame_index, :frame_instance_count],
+                        exclude_points)
+
+                video_writer.append_data(image)
+
+        else:
+            print('ERROR: unknown version for file format:', vid_grp.attrs['version'])
+
+    print('finished generating video:', out_video_path, flush=True)
+
+
+def process_video_relpath(video_relpath, pose_suffix, in_dir, out_dir, exclude_points):
+
+    pose_suffex_noext, _ = os.path.splitext(pose_suffix)
+    if len(pose_suffex_noext) == 0:
+        print('ERROR: bad pose suffix: ' + pose_suffix, flush=True)
+        return
+
+    # calculate full file paths from the in/out dirs and relative path
+    relpath_noext, _ = os.path.splitext(video_relpath)
+    in_video_path = os.path.join(in_dir, video_relpath)
+    pose_h5_path = os.path.join(in_dir, relpath_noext + pose_suffix)
+    out_video_path = os.path.join(out_dir, relpath_noext + pose_suffex_noext + '.avi')
+
+    # we may need to create the output dir
+    if out_dir != in_dir:
+        full_out_dir = os.path.dirname(out_video_path)
+        os.makedirs(full_out_dir, exist_ok=True)
+
+    process_video(in_video_path, pose_h5_path, out_video_path, exclude_points)
+
+
+# Examples:
+#   python -u tools/rendervidoverlay.py \
+#       --exclude-forepaws --exclude-ears \
+#       dir --in-dir ~/smb/labshare \
+#       --pose-suffix '_pose_est_v3.h5' --num-procs 3 \
+#       --out-dir ~/smb/labshare/kumarlab-new/Keith/BXD-pose-overlay-2020-08-14 \
+#       --batch-file data/BXD-batch-50-subset.txt
+
+def main():
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--exclude-forepaws',
+        action='store_true',
+        dest='exclude_forepaws',
+        default=False,
+        help='should we exclude the forepaws',
+    )
+    parser.add_argument(
+        '--exclude-ears',
+        action='store_true',
+        dest='exclude_ears',
+        default=False,
+        help='should we exclude the ears',
+    )
+
+    subparsers = parser.add_subparsers()
+
+    dir_parser = subparsers.add_parser(
+        'dir',
+        help='dir subcommand help (for processing a directory of videos)')
+    dir_parser.set_defaults(subcommand='dir')
+
+    dir_parser.add_argument(
+        '--in-dir',
+        help='input directory of videos to process',
+        required=True,
+    )
+    dir_parser.add_argument(
+        '--out-dir',
+        help='out directory to save videos to (defaults to the same as --in-dir)',
+        required=False,
+    )
+    dir_parser.add_argument(
+        '--pose-suffix',
+        help='the suffix used for pose estimation files (appended to'
+             ' video file after removing extension)',
+        nargs='+',
+        required=True,
+    )
+    dir_parser.add_argument(
+        '--num-procs',
+        help='the number of processes to use',
+        default=2,
+        type=int,
+    )
+    dir_parser.add_argument(
+        '--batch-file',
+        help='a newline separated list of video files to process. Paths'
+             ' should be relative to the given --in-dir. The default'
+             ' behavior if this argument is missing is to traverse the'
+             ' --in-dir and process all AVI files.',
+        required=False,
+    )
+
+    vid_parser = subparsers.add_parser(
+        'vid',
+        help='vid subcommand help (for processing a single video)')
+    vid_parser.set_defaults(subcommand='vid')
+
+    vid_parser.add_argument(
+        '--in-vid',
+        help='input video to process',
+        required=True,
+    )
+    vid_parser.add_argument(
+        '--in-pose',
+        help='input HDF5 pose file',
+        required=True,
+    )
+    vid_parser.add_argument(
+        '--out-vid',
+        help='output pose overlay video to generate',
+        required=True,
+    )
+
+    args = parser.parse_args()
+
+    exclude_points = set()
+    if args.exclude_forepaws:
+        exclude_points.add(LEFT_FRONT_PAW_INDEX)
+        exclude_points.add(RIGHT_FRONT_PAW_INDEX)
+    if args.exclude_ears:
+        exclude_points.add(LEFT_EAR_INDEX)
+        exclude_points.add(RIGHT_EAR_INDEX)
+
+    if 'subcommand' in args:
+        if args.subcommand == 'dir':
+
+            out_dir = args.in_dir
+            if args.out_dir is not None:
+                out_dir = args.out_dir
+
+            files_to_process = []
+            if args.batch_file is not None:
+                with open(args.batch_file) as f:
+                    for line in f:
+                        files_to_process.append(line.strip())
+
+            else:
+                for dirname, _, filelist in os.walk(args.in_dir):
+                    for fname in filelist:
+                        if fname.lower().endswith('.avi'):
+                            fpath = os.path.join(dirname, fname)
+                            rel_fpath = os.path.relpath(fpath, args.in_dir)
+                            files_to_process.append(rel_fpath)
+
+            with mp.Pool(args.num_procs) as p:
+                for rel_fpath in files_to_process:
+                    for pose_suffix in args.pose_suffix:
+                        p.apply_async(
+                            process_video_relpath,
+                            (rel_fpath, pose_suffix, args.in_dir, out_dir, exclude_points),
+                            dict(),
+                            lambda x: None,
+                            lambda x: print(x))
+
+                p.close()
+                p.join()
+
+        elif args.subcommand == 'vid':
+            process_video(args.in_vid, args.in_pose, args.out_vid, exclude_points)
+
+    else:
+        print('ERROR: dir or vid subcommand must be specified')
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/sampleframes.py b/tools/sampleframes.py
new file mode 100644
index 0000000..32f656c
--- /dev/null
+++ b/tools/sampleframes.py
@@ -0,0 +1,201 @@
+import argparse
+import cv2
+import imageio
+import itertools
+import math
+import numpy as np
+import os
+
+
+# Example:
+#
+#   share_root='/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar'
+#   python tools/sampleframes.py \
+#       --videos "${share_root}"/NV1-B2B/2019-10-2[23]/*.avi \
+#       --root-dir "${share_root}" \
+#       --outdir sampled_frames \
+#       --include-neighbor-frames
+#
+#   share_root='/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar'
+#   python tools/sampleframes.py \
+#       --videos "${share_root}"/NV1-B2B/2019-10-2[23]/*.avi \
+#       --root-dir "${share_root}" \
+#       --outdir sampled_frames \
+#       --include-neighbor-frames
+#
+#   share_root='/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar'
+#   python tools/sampleframes.py \
+#       --videos \
+#           "${share_root}"/NV16-UCSD/2019-10-09/3879434_2019-10-09_20-00-00.avi \
+#           "${share_root}"/NV16-UCSD/2019-10-11/3879436_2019-10-12_13-00-00.avi \
+#           "${share_root}"/NV16-UCSD/2019-10-14/3879439_2019-10-15_03-00-00.avi \
+#       --root-dir "${share_root}" \
+#       --outdir sampled_frames_UCSD \
+#       --include-neighbor-frames
+#
+#   python tools/sampleframes.py \
+#       --videos \
+#             ../gaitanalysis/spot-check/LL1-1_002105-M-AX12-5.28571428571429-42640-1-S331.avi \
+#             ../gaitanalysis/spot-check/LL1-3_000690-M-MP13-8-42416-3-S080.avi \
+#             ../gaitanalysis/spot-check/LL1-3_001800-M-MP16-10-42409-3-S099.avi \
+#             ../gaitanalysis/spot-check/LL1-4_002105-F-AX12-5.28571428571429-42640-4-S329.avi \
+#             ../gaitanalysis/spot-check/LL2-2_002019-M-AX30-10.2857142857143-42864-3-S420.avi \
+#             ../gaitanalysis/spot-check/LL2-3_000674-F-AX18-5-42726-1-S393.avi \
+#             ../gaitanalysis/spot-check/LL2-4_002105-M-AX12-5.28571428571429-42640-8-S332.avi \
+#             ../gaitanalysis/spot-check/LL2-4_LP.avi \
+#             ../gaitanalysis/spot-check/LL3-1_000687-M-AX11-7.71428571428571-42630-1-S320.avi \
+#             ../gaitanalysis/spot-check/LL3-2_000674-F-AX18-10-42691-4-S393.avi \
+#             ../gaitanalysis/spot-check/LL3-2_000687-M-AX11-6.42857142857143-42639-2-S337.avi \
+#             ../gaitanalysis/spot-check/LL3-2_002019-F-AX30-8.28571428571429-42878-2-S421.avi \
+#             ../gaitanalysis/spot-check/LL4-1_005314-F-AX5-9-42423-3-S137.avi \
+#             ../gaitanalysis/spot-check/LL4-3_000674-M-AX18-4.71428571428571-42728-1-S395.avi \
+#             ../gaitanalysis/spot-check/LL4-3_000690-F-MP13-8-42402-5-S018.avi \
+#             ../gaitanalysis/spot-check/LL4-4_000676-M-AX29-10.2857142857143-42864-3-S422.avi \
+#             ../gaitanalysis/spot-check/LL5-3_000928-M-AX1-8-42423-5-S123.avi \
+#             ../gaitanalysis/spot-check/LL5-4_001800-F-MP16-8-42409-1-S026.avi \
+#             ../gaitanalysis/spot-check/LL5-4_CAST_F.avi \
+#             ../gaitanalysis/spot-check/LL6-1_000674-M-AX18-5-42726-7-S396.avi \
+#             ../gaitanalysis/spot-check/LL6-1_000687-F-AX11-6.42857142857143-42639-4-S323.avi \
+#             ../gaitanalysis/spot-check/LL6-2_TALLYHOJngJ.avi \
+#             ../gaitanalysis/spot-check/LL6-3_000676-M-AX29-8-42409-7-S091.avi \
+#             ../gaitanalysis/spot-check/LL6-3_FVB_F.avi \
+#             ../gaitanalysis/spot-check/LL6-4_000687-F-AX11-7.71428571428571-42630-1-S323.avi \
+#       --root-dir "../gaitanalysis/spot-check" \
+#       --outdir fecal-boli-image-batch4 \
+#       --frames-per-vid 1
+#
+# share_root='/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar'
+# python tools/sampleframes.py \
+#       --root-dir "${share_root}" \
+#       --outdir sampled_frames_strain_survey_diverse \
+#       --batch ~/projects/gaitanalysis/data/metadata/strain-survey-selected-subset-batch-2019-04-18.txt
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--videos',
+        nargs='+',
+        default=[],
+        help='the input videos',
+    )
+    parser.add_argument(
+        '--batch',
+        help='batch file listing input videos (as an alternative to the videos option)'
+    )
+    parser.add_argument(
+        '--root-dir',
+        required=True,
+        help='when determining video network ID this prefix root is stripped from the video name',
+    )
+    parser.add_argument(
+        '--frames-per-vid',
+        type=int,
+        default=10,
+        help='how many frames to output per video',
+    )
+    parser.add_argument(
+        '--outdir',
+        required=True,
+        help='the output directory',
+    )
+    parser.add_argument(
+        '--neighbor-frame-count',
+        type=int,
+        default=0,
+        help='how many frames to the left and right should we also gather',
+    )
+    parser.add_argument(
+        '--mark-frame',
+        action='store_true',
+        help='mark the central frame (to facilitate annotation)',
+    )
+
+    args = parser.parse_args()
+
+    root_dir = os.path.normpath(args.root_dir)
+
+    def process_vid(net_id, vid_fname):
+        print('Processing:', vid_fname)
+
+        video_len = 0
+        with imageio.get_reader(vid_fname) as reader:
+            video_len = reader.get_length()
+            if not math.isfinite(video_len):
+                video_len = 0
+                for _ in reader:
+                    video_len += 1
+
+        assert video_len >= 30 * 60, vid_fname + ' is less than a minute long'
+
+        frames_to_sample = np.random.choice(video_len, args.frames_per_vid, replace=False)
+
+        neigh_count = args.neighbor_frame_count
+        if neigh_count > 0:
+            all_frames_to_sample = sorted(set(itertools.chain.from_iterable(
+                range(max(f - neigh_count, 0), min(f + neigh_count + 1, video_len))
+                for f in frames_to_sample)))
+        else:
+            all_frames_to_sample = sorted(frames_to_sample)
+
+        os.makedirs(args.outdir, exist_ok=True)
+        with imageio.get_reader(vid_fname) as reader:
+            for frame_index in all_frames_to_sample:
+                img_data = reader.get_data(frame_index)
+                if args.mark_frame and frame_index in frames_to_sample:
+                    mark_frame(img_data)
+
+                frame_fname = '{}_{:06d}.png'.format(
+                    net_id.replace('/', '+').replace('\\', '+'),
+                    frame_index)
+
+                imageio.imwrite(os.path.join(args.outdir, frame_fname), img_data)
+
+    for vid_fname in args.videos:
+        net_id = os.path.relpath(os.path.normpath(vid_fname), root_dir)
+
+        process_vid(net_id, vid_fname)
+
+    if args.batch:
+        with open(args.batch, 'r') as batch:
+            for net_id in batch:
+                net_id = net_id.strip()
+                vid_fname = os.path.join(args.root_dir, net_id)
+                process_vid(net_id, vid_fname)
+
+
+def mark_frame(img_data):
+    img_height, img_width, _ = img_data.shape
+    cv2.rectangle(
+        img_data,
+        (0, 0),
+        (3, 3),
+        (0, 0, 255),
+        -1,
+    )
+    cv2.rectangle(
+        img_data,
+        (0, img_width - 1),
+        (3, img_width - 4),
+        (0, 0, 255),
+        -1,
+    )
+    cv2.rectangle(
+        img_data,
+        (img_height - 1, 0),
+        (img_height - 4, 3),
+        (0, 0, 255),
+        -1,
+    )
+    cv2.rectangle(
+        img_data,
+        (img_height - 1, img_width - 1),
+        (img_height - 4, img_width - 4),
+        (0, 0, 255),
+        -1,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testcornermodel.py b/tools/testcornermodel.py
new file mode 100644
index 0000000..c9f1821
--- /dev/null
+++ b/tools/testcornermodel.py
@@ -0,0 +1,222 @@
+import argparse
+import colorsys
+import itertools
+import math
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import random
+import scipy.stats
+import skimage.draw
+import skimage.io
+import torch
+import torch.backends.cudnn as cudnn
+
+import torchvision.transforms as transforms
+
+import _init_paths
+import utils.assocembedutil as aeutil
+from config import cfg
+from config import update_config
+from infercorners import argmax_2d
+
+#from dataset.multimousepose import MultiPoseDataset, parse_poses, decompose_frame_name
+from dataset.simplepointdata import parse_point_labels
+import models
+
+
+CM_PER_PIXEL = 19.5 * 2.54 / 400
+
+
+# Example use:
+# python -u tools/testcornermodel.py \
+#       --model-file output-corner/simplepoint/pose_hrnet/corner_2020-06-30_01/best_state.pth \
+#       --cfg experiments/corner/corner_2020-06-30_01.yaml \
+#       --cvat-files data/corner/*.xml \
+#       --image-dir data/corner/corner-images \
+#       --image-list data/corner/corner-val-set-LL-only.txt
+
+def main():
+
+    parser = argparse.ArgumentParser(description='test the corner model')
+
+    parser.add_argument(
+        '--cvat-files',
+        help='list of CVAT XML files to use',
+        nargs='+',
+        required=True,
+        type=str,
+    )
+    parser.add_argument(
+        '--image-dir',
+        help='directory containing images',
+        required=True,
+        type=str,
+    )
+    parser.add_argument(
+        '--image-list',
+        help='file containing newline separated list of images to use',
+        default=None,
+    )
+    parser.add_argument(
+        '--model-file',
+        help='the model file to use for inference',
+        required=True,
+    )
+    parser.add_argument(
+        '--cfg',
+        help='the configuration for the model to use for inference',
+        required=True,
+        type=str,
+    )
+
+    args = parser.parse_args()
+
+    print('=> loading configuration from {}'.format(args.cfg))
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    if args.model_file:
+        cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    with torch.no_grad():
+
+        model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+            cfg, is_train=False
+        )
+        print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+        model.eval()
+        model = model.cuda()
+
+        normalize = transforms.Normalize(
+            mean=[0.485], std=[0.229]
+        )
+        xform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=[0.45, 0.45, 0.45],
+                std=[0.225, 0.225, 0.225],
+            ),
+        ])
+
+        image_list_filename = args.image_list
+        img_names = None
+        if image_list_filename is not None:
+            img_names = set()
+            with open(image_list_filename) as val_file:
+                for curr_line in val_file:
+                    img_name = curr_line.strip()
+                    img_names.add(img_name)
+
+        pose_labels = list(itertools.chain.from_iterable(
+            parse_point_labels(f, 'corner') for f in args.cvat_files))
+        if img_names is not None:
+            pose_labels = [p for p in pose_labels if p['image_name'] in img_names]
+
+        point_error_dists = []
+        pose_dist_avg_sum = 0
+        for pose_label in pose_labels:
+            image_name = pose_label['image_name']
+            # label_pose_instances = [
+            #     aeutil.PoseInstance.from_xy_tensor(t)
+            #     for t in pose_label['pose_instances']
+            # ]
+            print('=============================')
+            print('image_name:', image_name)
+            print('== LABELS ==')
+            print(pose_label['point_xy'])
+            # print([pi.keypoints for pi in label_pose_instances])
+
+
+            image_path = os.path.join(args.image_dir, image_name)
+
+            #image_data_numpy = skimage.io.imread(image_path, as_gray=True)
+
+            #image_data = torch.from_numpy(image_data_numpy).to(torch.float32)
+
+            image_data_numpy = skimage.io.imread(image_path)
+            image_data = xform(image_data_numpy)
+            # image_data = normalize(image_data.unsqueeze(0)).squeeze(0)
+            #image_data = torch.stack([image_data] * 3)
+
+            # add a size 1 batch dimension to the image and move it to the GPU
+            batch_tensor = image_data.unsqueeze(0).cuda()
+
+            x = model(batch_tensor)
+
+            x.squeeze_(-3)
+
+            img_h = batch_tensor.size(-2)
+            img_w = batch_tensor.size(-1)
+
+            x_ul = x[:, :(img_h // 2), :(img_w // 2)]
+            x_ll = x[:, (img_h // 2):, :(img_w // 2)]
+            x_ur = x[:, :(img_h // 2), (img_w // 2):]
+            x_lr = x[:, (img_h // 2):, (img_w // 2):]
+
+            maxvals_ul, preds_ul = argmax_2d(x_ul)
+            maxvals_ll, preds_ll = argmax_2d(x_ll)
+            maxvals_ur, preds_ur = argmax_2d(x_ur)
+            maxvals_lr, preds_lr = argmax_2d(x_lr)
+
+            preds_ul = preds_ul.cpu().numpy().astype(np.uint16)
+            preds_ll = preds_ll.cpu().numpy().astype(np.uint16)
+            preds_ur = preds_ur.cpu().numpy().astype(np.uint16)
+            preds_lr = preds_lr.cpu().numpy().astype(np.uint16)
+
+            preds_ll[..., 0] += img_h // 2
+            preds_ur[..., 1] += img_w // 2
+            preds_lr[..., 0] += img_h // 2
+            preds_lr[..., 1] += img_w // 2
+
+            pred_stack = np.stack([preds_ul, preds_ll, preds_ur, preds_lr], axis=-2)
+            pred_stack = pred_stack[..., [-1, -2]] # go from (y, x) to (x, y)
+            pred_stack = np.squeeze(pred_stack, axis=0)
+            print('== INFERENCE ==')
+            print(pred_stack)
+            print()
+
+            _, axs = plt.subplots(1, 2, figsize=(12, 6))
+
+            axs[0].imshow(skimage.io.imread(image_path, as_gray=True))
+
+            max_heatmap_np = x[0, ...].cpu().numpy()
+            # max_heatmap_np[20, 20] = 1
+            axs[1].imshow(max_heatmap_np)
+
+            image_base, _ = os.path.splitext(os.path.basename(image_name))
+            plt.savefig(os.path.join(
+                'temp',
+                'corner',
+                image_base + '_corner_heatmap.png'))
+
+            plt.close()
+
+            max_dist = np.linalg.norm(np.array([img_h / 2, img_w / 2]))
+            for i in range(4):
+                curr_best_dist = max_dist
+                curr_pred = pred_stack[i, :]
+                for j in range(4):
+                    curr_lbl = pose_label['point_xy'][j, :]
+                    curr_dist = np.linalg.norm(curr_pred - curr_lbl)
+                    if curr_dist < curr_best_dist:
+                        curr_best_dist = curr_dist
+                point_error_dists.append(curr_best_dist)
+
+            print(point_error_dists[-4:])
+
+        pixel_err_dist_sem = scipy.stats.sem(point_error_dists, axis=None, nan_policy='omit')
+        pixel_err_dist_mean = np.mean(point_error_dists)
+        print(f'Pixel MAE: {pixel_err_dist_mean:.2f} ±{pixel_err_dist_sem:.2f} {pixel_err_dist_mean * CM_PER_PIXEL:.2f} ±{pixel_err_dist_sem * CM_PER_PIXEL:.2f}')
+        print(sorted(point_error_dists, reverse=True)[:10])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testfecalboli.py b/tools/testfecalboli.py
new file mode 100644
index 0000000..055a7a5
--- /dev/null
+++ b/tools/testfecalboli.py
@@ -0,0 +1,251 @@
+import argparse
+import imageio
+import itertools
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import skimage
+import yaml
+
+import torch
+import torch.backends.cudnn as cudnn
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+from dataset.fecalbolidata import parse_fecal_boli_labels
+from inferfecalbolicount import infer_fecal_boli_xy
+
+import models
+
+
+# Examples:
+#
+#   python -u tools/testfecalboli.py \
+#       --model-file output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       --cfg experiments/fecalboli/fecalboli_2020-06-19_02.yaml \
+#       --cvat-files data/fecal-boli/*.xml \
+#       --image-dir data/fecal-boli/images \
+#       --image-list data/fecal-boli/fecal-boli-val-set.txt
+#
+#   python -u tools/testfecalboli.py \
+#       --model-file output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       --cfg experiments/fecalboli/fecalboli_2020-06-19_02.yaml \
+#       --cvat-files data/fecal-boli/*.xml \
+#       --image-dir data/fecal-boli/images \
+#       --image-list data/fecal-boli/fecal-boli-val-set.txt \
+#       --min-heatmap-val 0.3
+#
+#   python -u tools/testfecalboli.py \
+#       --model-file output-fecal-boli/fecalboli/pose_hrnet/fecalboli_2020-06-19_02/best_state.pth \
+#       --cfg experiments/fecalboli/fecalboli_2020-06-19_02.yaml \
+#       --cvat-files data/fecal-boli/*.xml \
+#       --image-dir data/fecal-boli/images \
+#       --image-list data/fecal-boli/fecal-boli-val-set.txt \
+#       --min-heatmap-val 0.3 \
+#       --image-out-dir temp11
+
+
+def gen_valid_point_combos(lbl_xy_list, inf_xy_list, max_dist):
+
+    for lbl_xy in lbl_xy_list:
+        for inf_xy in inf_xy_list:
+
+            dist = np.linalg.norm(lbl_xy - inf_xy)
+            if dist <= max_dist:
+                yield {
+                    'lbl_xy': lbl_xy,
+                    'inf_xy': inf_xy,
+                    'lbl_xy_tuple': tuple(lbl_xy),
+                    'inf_xy_tuple': tuple(inf_xy),
+                    'dist': dist,
+                }
+
+
+def render_overlays(raw_image, image_out_file, true_pos_xys, false_pos_xys, false_neg_xys):
+
+    fig = plt.figure(figsize=(12, 12))
+    ax = fig.gca()
+
+    for curr_xy in true_pos_xys:
+        ax.add_artist(plt.Circle(curr_xy, 10, color='g', fill=False))
+    for curr_xy in false_pos_xys:
+        ax.add_artist(plt.Circle(curr_xy, 10, color='r', fill=False))
+    for curr_xy in false_neg_xys:
+        ax.add_artist(plt.Circle(curr_xy, 10, color='y', fill=False))
+
+    plt.imshow(raw_image)
+    plt.axis('off')
+    plt.tight_layout()
+    plt.savefig(image_out_file)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--model-file',
+        help='the model file to use for inference',
+    )
+    parser.add_argument(
+        '--cvat-files',
+        help='list of CVAT XML files to use',
+        nargs='+',
+        required=True,
+        type=str,
+    )
+    parser.add_argument(
+        '--image-dir',
+        help='directory containing images',
+        required=True,
+        type=str,
+    )
+    parser.add_argument(
+        '--image-list',
+        help='file containing newline separated list of images to use',
+        default=None,
+    )
+    parser.add_argument(
+        '--cfg',
+        help='the configuration for the model to use for inference',
+    )
+    parser.add_argument(
+        '--min-heatmap-val',
+        type=float,
+        default=0.75,
+    )
+    parser.add_argument(
+        '--max-dist-px',
+        type=float,
+        default=5.0,
+    )
+    parser.add_argument(
+        '--image-out-dir',
+        type=str,
+    )
+
+    args = parser.parse_args()
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    if args.image_out_dir:
+        os.makedirs(args.image_out_dir, exist_ok=True)
+
+    with torch.no_grad():
+
+        model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+            cfg, is_train=False
+        )
+        # print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+        model.eval()
+        model = model.cuda()
+
+        # normalize = transforms.Normalize(
+        #     mean=[0.485], std=[0.229]
+        # )
+
+        image_list_filename = args.image_list
+        img_names = None
+        if image_list_filename is not None:
+            img_names = set()
+            with open(image_list_filename) as val_file:
+                for curr_line in val_file:
+                    img_name = curr_line.strip()
+                    img_names.add(img_name)
+
+        fecal_boli_labels = list(itertools.chain.from_iterable(
+            parse_fecal_boli_labels(f) for f in args.cvat_files))
+        if img_names is not None:
+            fecal_boli_labels = [
+                lbl for lbl in fecal_boli_labels
+                if lbl['image_name'] in img_names
+            ]
+
+        accuracies = []
+        avg_pixel_errors = []
+        precisions = []
+        recalls = []
+
+        print('\t'.join(['Name', 'Accuracy', 'Average Pixel Error', 'Precision', 'Recall']))
+        for lbl in fecal_boli_labels:
+            
+            image_path = os.path.join(args.image_dir, lbl['image_name'])
+            image_data_numpy = skimage.io.imread(image_path, as_gray=False)
+
+            inf_xy_vals = infer_fecal_boli_xy(
+                model,
+                [image_data_numpy],
+                args.min_heatmap_val,
+            )
+
+            inf_xy_list = list(next(inf_xy_vals).numpy())
+            lbl_xy_list = list(lbl['fecal_boli_xy'])
+
+            point_combos = gen_valid_point_combos(lbl_xy_list, inf_xy_list, args.max_dist_px)
+            point_combos = sorted(point_combos, key=lambda pc: pc['dist'])
+
+            labels_found = set()
+            infs_found = set()
+            best_point_combos = []
+            for pc in point_combos:
+                if pc['lbl_xy_tuple'] not in labels_found and pc['inf_xy_tuple'] not in infs_found:
+                    labels_found.add(pc['lbl_xy_tuple'])
+                    infs_found.add(pc['inf_xy_tuple'])
+                    best_point_combos.append(pc)
+
+            true_pos = len(best_point_combos)
+            false_neg = len(lbl_xy_list) - true_pos
+            false_pos = len(inf_xy_list) - true_pos
+
+            acc = true_pos / (true_pos + false_neg + false_pos)
+            avg_pixel_err = np.mean([pc['dist'] for pc in best_point_combos])
+            precision = true_pos / (true_pos + false_pos)
+            recall = true_pos / (true_pos + false_neg)
+
+            accuracies.append(acc)
+            avg_pixel_errors.append(avg_pixel_err)
+            precisions.append(precision)
+            recalls.append(recall)
+
+            if args.image_out_dir:
+                true_pos_xys = [pc['inf_xy'] for pc in best_point_combos]
+                false_pos_xys = [(x, y) for x, y in inf_xy_list if (x, y) not in infs_found]
+                false_neg_xys = [(x, y) for x, y in lbl_xy_list if (x, y) not in labels_found]
+                image_name_root, image_name_ext = os.path.splitext(lbl['image_name'])
+                image_out_file = os.path.join(args.image_out_dir, image_name_root + '_fb_validation.png')
+                render_overlays(
+                    image_data_numpy,
+                    image_out_file,
+                    true_pos_xys,
+                    false_pos_xys,
+                    false_neg_xys,
+                )
+
+            print('\t'.join([
+                lbl['image_name'],
+                str(acc),
+                str(avg_pixel_err),
+                str(precision),
+                str(recall),
+            ]))
+
+        print('\t'.join([
+            'total avg',
+            str(np.mean(accuracies)),
+            str(np.mean(avg_pixel_errors)),
+            str(np.mean(precisions)),
+            str(np.mean(recalls)),
+        ]))
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testfecalbolidata.py b/tools/testfecalbolidata.py
new file mode 100644
index 0000000..962f90b
--- /dev/null
+++ b/tools/testfecalbolidata.py
@@ -0,0 +1,121 @@
+import argparse
+import itertools
+import matplotlib.pyplot as plt
+import numpy as np
+import random
+import skimage.draw as skidraw
+
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+from dataset.fecalbolidata import FecalBoliDataset, parse_fecal_boli_labels
+import models
+
+
+# Example:
+#
+#  python -u tools/testfecalbolidata.py \
+#       --cfg experiments/fecalboli/fecalboli_2020-06-19_01.yaml \
+#       --cvat-files data/fecal-boli/*.xml \
+#       --image-dir data/fecal-boli/images
+
+def main():
+    parser = argparse.ArgumentParser(description='test the multimouse pose dataset')
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files to use',
+                        nargs='+',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-dir',
+                        help='directory containing images',
+                        required=True,
+                        type=str)
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--prevModelDir',
+                        help='prev Model directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    update_config(cfg, args)
+
+    normalize = transforms.Normalize(
+        mean=[0.485], std=[0.229]
+    )
+
+    all_labels = list(itertools.chain.from_iterable(parse_fecal_boli_labels(f) for f in args.cvat_files))
+    mpose_ds = FecalBoliDataset(
+        cfg,
+        args.image_dir,
+        all_labels,
+        True,
+        normalize,
+    )
+
+    for _ in range(100):
+        i = random.randrange(len(mpose_ds))
+        print('doing', i)
+        item = mpose_ds[i]
+
+        print("item['image'].shape:", item['image'].shape)
+
+        image = item['image'].numpy()
+
+        chan_count = image.shape[0]
+        plt_rows = 1
+        plt_cols = 2
+        fig = plt.figure(figsize=(16, 8))
+
+        for chan_index in range(chan_count):
+            fig.add_subplot(plt_rows, plt_cols, chan_index + 1)
+            plt.imshow(image[chan_index, ...], cmap='gray')
+
+        fig.add_subplot(plt_rows, plt_cols, chan_count + 1)
+        plt.imshow(item['heatmap'].numpy().max(0))
+
+        # pose_instances = item['pose_instances'][:item['instance_count'], ...]
+        # inst_image = np.zeros([image.shape[1], image.shape[2], 3], dtype=np.float32)
+        # inst_image_counts = np.zeros([image.shape[1], image.shape[2]], dtype=np.uint8)
+        # for instance_index, pose_instance in enumerate(pose_instances):
+        #     for xy_point in pose_instance:
+        #         temp_inst_image = np.zeros([image.shape[1], image.shape[2], 3], dtype=np.float32)
+        #         rr, cc = skidraw.circle(xy_point[1], xy_point[0], 10, inst_image.shape)
+        #         skidraw.set_color(temp_inst_image, (rr, cc), colors[instance_index])
+        #         inst_image_counts[rr, cc] += 1
+        #         inst_image += temp_inst_image
+        # inst_image /= np.expand_dims(inst_image_counts, 2)
+
+        # fig.add_subplot(plt_rows, plt_cols, chan_count + 2)
+        # plt.imshow(inst_image * np.expand_dims(item['joint_heatmaps'].numpy().max(0), 2))
+        # plt.show()
+
+        plt.savefig('testfbdata/img{}.png'.format(i))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testmouseposemodel.py b/tools/testmouseposemodel.py
new file mode 100644
index 0000000..16bd69f
--- /dev/null
+++ b/tools/testmouseposemodel.py
@@ -0,0 +1,250 @@
+import argparse
+import h5py
+import numpy as np
+import scipy.stats
+import yaml
+
+import matplotlib.pyplot as plt
+
+import torch
+import torch.nn.parallel
+import torch.nn.functional as torchfunc
+import torch.multiprocessing as mp
+import torch.backends.cudnn as cudnn
+import torch.optim
+import torch.utils.data
+import torch.utils.data.distributed
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from core.inference import get_final_preds
+from core.inference import get_max_preds
+
+import dataset
+import models
+
+CM_PER_PIXEL = 19.5 * 2.54 / 400
+
+
+NOSE_INDEX = 0
+
+LEFT_EAR_INDEX = 1
+RIGHT_EAR_INDEX = 2
+
+BASE_NECK_INDEX = 3
+
+LEFT_FRONT_PAW_INDEX = 4
+RIGHT_FRONT_PAW_INDEX = 5
+
+CENTER_SPINE_INDEX = 6
+
+LEFT_REAR_PAW_INDEX = 7
+RIGHT_REAR_PAW_INDEX = 8
+
+BASE_TAIL_INDEX = 9
+MID_TAIL_INDEX = 10
+TIP_TAIL_INDEX = 11
+
+INDEX_NAMES = [
+    'Nose',
+
+    'Left Ear',
+    'Right Ear',
+
+    'Base Neck',
+
+    'Left Front Paw',
+    'Right Front Paw',
+
+    'Center Spine',
+
+    'Left Rear Paw',
+    'Right Rear Paw',
+
+    'Base Tail',
+    'Mid Tail',
+    'Tip Tail',
+]
+
+# Examples:
+#
+#   python -u tools/testmouseposemodel.py \
+#       --model-file ../pose-est-env/pose-est-model.pth \
+#       ../pose-est-env/pose-est-conf.yaml
+#
+#   python -u tools/testmouseposemodel.py \
+#       --model-file ../pose-est-env/pose-est-model.pth \
+#       --category-yaml data/hdf5mouse/merged_pose_annos_mouse_categories_2019-06-26.yaml \
+#       --category-count-cap 200 \
+#       ../pose-est-env/pose-est-conf.yaml
+#
+#   python -u tools/testmouseposemodel.py \
+#       --model-file ../pose-est-env/pose-est-model.pth \
+#       --category-yaml data/hdf5mouse/diverse-strain-poses-categories.yaml \
+#       --dataset-root data/hdf5mouse/diverse-strain-poses.h5 \
+#       ../pose-est-env/pose-est-conf.yaml
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument(
+        '--model-file',
+        help='the model file to use for inference',
+        default=None,
+    )
+
+    parser.add_argument(
+        '--dataset-root',
+        help='the dataset to use for inference',
+        default=None,
+    )
+
+    parser.add_argument(
+        '--category-yaml',
+        help='a YAML file describing which category the validation images fall into',
+        default=None,
+    )
+
+    parser.add_argument(
+        '--category-count-cap',
+        help='if this is selected we shuffle then cap the count in each category',
+        type=int,
+        required=False,
+    )
+
+    parser.add_argument(
+        'cfg',
+        help='the configuration for the model to use for inference',
+    )
+
+    args = parser.parse_args()
+
+    print('=> loading configuration from {}'.format(args.cfg))
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    if args.model_file:
+        cfg.TEST.MODEL_FILE = args.model_file
+    if args.dataset_root:
+        cfg.DATASET.ROOT = args.dataset_root
+    cfg.freeze()
+
+    name_category_map = dict()
+    if args.category_yaml:
+        with open(args.category_yaml, 'r') as category_yaml_file:
+            category_dict_list = yaml.safe_load(category_yaml_file)
+            for category_dict in category_dict_list:
+                for group_name in category_dict['group_names']:
+                    # print(category_dict['category_name'], group_name)
+                    name_category_map[group_name] = category_dict['category_name']
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+        cfg, is_train=False
+    )
+    print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+    model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+    model.eval()
+    model = model.cuda()
+
+    xform = transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize(
+            mean=[0.45],
+            std=[0.225],
+        ),
+    ])
+
+    with torch.no_grad():
+
+        category_pixel_err_dists = dict()
+        with h5py.File(cfg.DATASET.ROOT, 'r') as hdf5file:
+
+            for name, group in hdf5file[cfg.DATASET.TEST_SET].items():
+                category = 'default'
+                if name in name_category_map:
+                    category = name_category_map[name]
+
+                # print('NAME:', name, category)
+                if category not in category_pixel_err_dists:
+                    category_pixel_err_dists[category] = []
+                
+                if 'frames' in group and 'points' in group:
+                    points = group['points']
+                    for grp_frame_index in range(points.shape[0]):
+                        grp_frame_pts = points[grp_frame_index, ...]
+
+                        data_numpy = group['frames'][grp_frame_index, ...]
+                        data_numpy = data_numpy.squeeze(2)
+                        data = xform(data_numpy).squeeze(0)
+                        data = data.cuda()
+                        data = torch.stack([data] * 3)
+                        data = data.unsqueeze(0)
+
+                        # print(grp_frame_pts.shape)
+                        # print(data.shape)
+
+                        inf_out = model(data)
+                        in_out_ratio = data.size(-1) // inf_out.size(-1)
+                        if in_out_ratio == 4:
+                            # print('need to upscale')
+                            inf_out = torchfunc.upsample(inf_out, scale_factor=4, mode='bicubic', align_corners=False)
+                        inf_out = inf_out.cpu().numpy()
+                        # print('inf_out.shape:', inf_out.shape)
+
+                        preds, maxvals = get_max_preds(inf_out)
+                        preds = preds.astype(np.uint16).squeeze(0)
+                        maxvals = maxvals.squeeze(2).squeeze(0)
+
+                        pixel_err = preds.astype(np.float32) - grp_frame_pts
+                        pixel_err_dist = np.linalg.norm(pixel_err, ord=2, axis=1)
+                        category_pixel_err_dists[category].append(pixel_err_dist)
+
+        rng = np.random.default_rng(1111)
+        for category, pixel_err_dists in category_pixel_err_dists.items():
+
+            if args.category_count_cap is not None:
+                rng.shuffle(pixel_err_dists)
+                pixel_err_dists = pixel_err_dists[:args.category_count_cap]
+
+            print()
+            print('=======================')
+            print('DATA CATEGORY:', category, 'COUNT:', len(pixel_err_dists))
+
+            pixel_err_dists = np.stack(pixel_err_dists)
+
+            pixel_err_dist_mean = np.nanmean(pixel_err_dists)
+            pixel_err_dist_sem = scipy.stats.sem(pixel_err_dists, axis=None, nan_policy='omit')
+
+            pixel_err_dist_mean = np.nanmean(pixel_err_dists)
+            pixel_err_dist_sem = scipy.stats.sem(pixel_err_dists, axis=None, nan_policy='omit')
+
+            pixel_err_dist_means = np.nanmean(pixel_err_dists, axis=0)
+            pixel_dist_sems = scipy.stats.sem(pixel_err_dists, axis=0, nan_policy='omit')
+
+            print(pixel_err_dist_mean)
+            print(pixel_err_dist_sem)
+            print(f'OVERALL MAE: {pixel_err_dist_mean:.2f} ±{pixel_err_dist_sem:.2f} {pixel_err_dist_mean * CM_PER_PIXEL:.2f} ±{pixel_err_dist_sem * CM_PER_PIXEL:.2f}')
+            print()
+
+            print(f'NOSE Pixel MAE:              {pixel_err_dist_means[NOSE_INDEX]:.2f} ±{pixel_dist_sems[NOSE_INDEX]:.2f} {pixel_err_dist_means[NOSE_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[NOSE_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'LEFT_EAR Pixel MAE:          {pixel_err_dist_means[LEFT_EAR_INDEX]:.2f} ±{pixel_dist_sems[LEFT_EAR_INDEX]:.2f} {pixel_err_dist_means[LEFT_EAR_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[LEFT_EAR_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'RIGHT_EAR Pixel MAE:         {pixel_err_dist_means[RIGHT_EAR_INDEX]:.2f} ±{pixel_dist_sems[RIGHT_EAR_INDEX]:.2f} {pixel_err_dist_means[RIGHT_EAR_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[RIGHT_EAR_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'BASE_NECK Pixel MAE:         {pixel_err_dist_means[BASE_NECK_INDEX]:.2f} ±{pixel_dist_sems[BASE_NECK_INDEX]:.2f} {pixel_err_dist_means[BASE_NECK_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[BASE_NECK_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'LEFT_FRONT_PAW Pixel MAE:    {pixel_err_dist_means[LEFT_FRONT_PAW_INDEX]:.2f} ±{pixel_dist_sems[LEFT_FRONT_PAW_INDEX]:.2f} {pixel_err_dist_means[LEFT_FRONT_PAW_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[LEFT_FRONT_PAW_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'RIGHT_FRONT_PAW Pixel MAE:   {pixel_err_dist_means[RIGHT_FRONT_PAW_INDEX]:.2f} ±{pixel_dist_sems[RIGHT_FRONT_PAW_INDEX]:.2f} {pixel_err_dist_means[RIGHT_FRONT_PAW_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[RIGHT_FRONT_PAW_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'CENTER_SPINE Pixel MAE:      {pixel_err_dist_means[CENTER_SPINE_INDEX]:.2f} ±{pixel_dist_sems[CENTER_SPINE_INDEX]:.2f} {pixel_err_dist_means[CENTER_SPINE_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[CENTER_SPINE_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'LEFT_REAR_PAW Pixel MAE:     {pixel_err_dist_means[LEFT_REAR_PAW_INDEX]:.2f} ±{pixel_dist_sems[LEFT_REAR_PAW_INDEX]:.2f} {pixel_err_dist_means[LEFT_REAR_PAW_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[LEFT_REAR_PAW_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'RIGHT_REAR_PAW Pixel MAE:    {pixel_err_dist_means[RIGHT_REAR_PAW_INDEX]:.2f} ±{pixel_dist_sems[RIGHT_REAR_PAW_INDEX]:.2f} {pixel_err_dist_means[RIGHT_REAR_PAW_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[RIGHT_REAR_PAW_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'BASE_TAIL Pixel MAE:         {pixel_err_dist_means[BASE_TAIL_INDEX]:.2f} ±{pixel_dist_sems[BASE_TAIL_INDEX]:.2f} {pixel_err_dist_means[BASE_TAIL_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[BASE_TAIL_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'MID_TAIL Pixel MAE:          {pixel_err_dist_means[MID_TAIL_INDEX]:.2f} ±{pixel_dist_sems[MID_TAIL_INDEX]:.2f} {pixel_err_dist_means[MID_TAIL_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[MID_TAIL_INDEX] * CM_PER_PIXEL:.2f}')
+            print(f'TIP_TAIL Pixel MAE:          {pixel_err_dist_means[TIP_TAIL_INDEX]:.2f} ±{pixel_dist_sems[TIP_TAIL_INDEX]:.2f} {pixel_err_dist_means[TIP_TAIL_INDEX] * CM_PER_PIXEL:.2f} ±{pixel_dist_sems[TIP_TAIL_INDEX] * CM_PER_PIXEL:.2f}')
+            print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testmultimousedata.py b/tools/testmultimousedata.py
new file mode 100644
index 0000000..549ad93
--- /dev/null
+++ b/tools/testmultimousedata.py
@@ -0,0 +1,137 @@
+import argparse
+import colorsys
+import itertools
+import matplotlib.pyplot as plt
+import numpy as np
+import random
+import skimage.draw as skidraw
+
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+
+from dataset.multimousepose import MultiPoseDataset, parse_poses
+import models
+
+
+# Example:
+#
+#  python -u tools/testmultimousedata.py \
+#       --cfg /home/sheppk/projects/deep-high-resolution-net.pytorch/experiments/multimouse/multimouse-1.yaml
+#       --cvat-files \
+#           /run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Annotations/*.xml \
+#           /run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Annotations_NoMarkings/*.xml \
+#       --image-dir '/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Dataset'
+
+def random_colors(N, bright=True):
+    """
+    Generate random colors.
+    To get visually distinct colors, generate them in HSV space then
+    convert to RGB.
+    """
+    brightness = 1.0 if bright else 0.7
+    hsv = [(i / N, 1, brightness) for i in range(N)]
+    colors = [colorsys.hsv_to_rgb(*c) for c in hsv]
+    #random.shuffle(colors)
+    return colors
+
+
+def main():
+    parser = argparse.ArgumentParser(description='test the multimouse pose dataset')
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files to use',
+                        nargs='+',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-dir',
+                        help='directory containing images',
+                        required=True,
+                        type=str)
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--prevModelDir',
+                        help='prev Model directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    update_config(cfg, args)
+
+    normalize = transforms.Normalize(
+        mean=[0.485], std=[0.229]
+    )
+
+    all_poses = list(itertools.chain.from_iterable(parse_poses(f) for f in args.cvat_files))
+    mpose_ds = MultiPoseDataset(
+        cfg,
+        args.image_dir,
+        all_poses,
+        True,
+        normalize,
+    )
+
+    colors = random_colors(10)
+
+    for _ in range(100):
+        i = random.randrange(len(mpose_ds))
+        print('doing', i)
+        item = mpose_ds[i]
+
+        print("item['image'].shape:", item['image'].shape)
+
+        image = item['image'].numpy()
+
+        chan_count = image.shape[0]
+        plt_rows = 1 if chan_count == 1 else 2
+        plt_cols = 3
+        fig = plt.figure(figsize=(8, 8))
+
+        for chan_index in range(chan_count):
+            fig.add_subplot(plt_rows, plt_cols, chan_index + 1)
+            plt.imshow(image[chan_index, ...], cmap='gray')
+
+        fig.add_subplot(plt_rows, plt_cols, chan_count + 1)
+        plt.imshow(item['joint_heatmaps'].numpy().max(0))
+
+        pose_instances = item['pose_instances'][:item['instance_count'], ...]
+        inst_image = np.zeros([image.shape[1], image.shape[2], 3], dtype=np.float32)
+        inst_image_counts = np.zeros([image.shape[1], image.shape[2]], dtype=np.uint8)
+        for instance_index, pose_instance in enumerate(pose_instances):
+            for xy_point in pose_instance:
+                temp_inst_image = np.zeros([image.shape[1], image.shape[2], 3], dtype=np.float32)
+                rr, cc = skidraw.circle(xy_point[1], xy_point[0], 10, inst_image.shape)
+                skidraw.set_color(temp_inst_image, (rr, cc), colors[instance_index])
+                inst_image_counts[rr, cc] += 1
+                inst_image += temp_inst_image
+        inst_image /= np.expand_dims(inst_image_counts, 2)
+
+        fig.add_subplot(plt_rows, plt_cols, chan_count + 2)
+        plt.imshow(inst_image * np.expand_dims(item['joint_heatmaps'].numpy().max(0), 2))
+        plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/testmultimouseinference.py b/tools/testmultimouseinference.py
new file mode 100644
index 0000000..91d9e6d
--- /dev/null
+++ b/tools/testmultimouseinference.py
@@ -0,0 +1,377 @@
+import argparse
+import colorsys
+import itertools
+import math
+import matplotlib.pyplot as plt
+import numpy as np
+import os
+import random
+import skimage.draw
+import skimage.io
+import torch
+import torch.backends.cudnn as cudnn
+
+import torchvision.transforms as transforms
+
+import _init_paths
+import utils.assocembedutil as aeutil
+from config import cfg
+from config import update_config
+
+from dataset.multimousepose import MultiPoseDataset, parse_poses, decompose_frame_name
+import models
+
+
+KEYPOINT_COUNT = 12
+
+def capped_pose_distance(pose1, pose2, dist_cap):
+
+    """
+    Returns an average pixel distance for poses along with a count of points
+    that are <= the given point distance cap. Distances are
+    taken for corresponding points between the poses. If either of the
+    two corresponding points from the two poses are farther apart from eachother
+    than dist_cap or if either of the two poses is missing the point, then
+    dist_cap will be used for the point distance of that pairing.
+    """
+
+    total_distance = 0
+    point_count = 0
+
+    for joint_index, pose1_keypoint in pose1.keypoints.items():
+        if joint_index in pose2.keypoints:
+            pose2_keypoint = pose2.keypoints[joint_index]
+            curr_dist = aeutil.xy_dist(pose1_keypoint, pose2_keypoint)
+            if curr_dist <= dist_cap:
+                total_distance += aeutil.xy_dist(pose1_keypoint, pose2_keypoint)
+                point_count += 1
+
+    avg_distance = (total_distance + dist_cap * (KEYPOINT_COUNT - point_count)) / 12
+
+    return avg_distance, point_count
+
+
+def random_colors(N, bright=True):
+    """
+    Generate random colors.
+    To get visually distinct colors, generate them in HSV space then
+    convert to RGB.
+    """
+    brightness = 1.0 if bright else 0.7
+    hsv = [(i / N, 1, brightness) for i in range(N)]
+    colors = [colorsys.hsv_to_rgb(*c) for c in hsv]
+    #random.shuffle(colors)
+    return colors
+
+
+# Example use:
+# python -u tools/testmultimouseinference.py \
+#       --cfg experiments/multimouse/multimouse-4.yaml \
+#       --model-file output-multi-mouse/multimousepose/pose_hrnet/multimouse-4/best_state.pth \
+#       --cvat-files /run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Annotations/*.xml \
+#       --image-dir '/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Dataset' \
+#       --image-list data/multi-mouse-val-set.txt \
+#       --image-out-dir image-out \
+#       --plot-heatmap \
+#       --dist-out-file output/dist-out.txt
+
+def main():
+
+    colors = random_colors(6)
+
+    parser = argparse.ArgumentParser(description='test the multimouse pose dataset')
+
+    parser.add_argument(
+        '--cvat-files',
+        help='list of CVAT XML files to use',
+        nargs='+',
+        required=True,
+        type=str,
+    )
+    parser.add_argument(
+        '--image-dir',
+        help='directory containing images',
+        required=True,
+        type=str,
+    )
+    parser.add_argument(
+        '--image-list',
+        help='file containing newline separated list of images to use',
+        default=None,
+    )
+    parser.add_argument(
+        '--model-file',
+        help='the model file to use for inference',
+        required=True,
+    )
+    parser.add_argument(
+        '--confidence-threshold',
+        help='minimum confidence threshold to test',
+        default=0.0,
+        type=float,
+    )
+    parser.add_argument(
+        '--cfg',
+        help='the configuration for the model to use for inference',
+        required=True,
+        type=str,
+    )
+    # TODO we should change this to cm units rather than pixels
+    parser.add_argument(
+        '--max-inst-dist-px',
+        help='maximum keypoint separation distance in pixels. For a keypoint to '
+             'be added to an instance there must be at least one point in the '
+             'instance which is within this number of pixels.',
+        type=int,
+        default=150,
+    )
+    parser.add_argument(
+        '--max-embed-sep-within-instances',
+        help='maximum embedding separation allowed for a joint to be added to an existing '
+             'instance within the max distance separation',
+        type=float,
+        default=0.2,
+    )
+    parser.add_argument(
+        '--min-embed-sep-between-instances',
+        help='if two joints of the the same type (eg. both right ear) are within the max '
+             'distance separation and their embedding separation doesn\'t meet or '
+             'exceed this threshold only the point with the highest heatmap value is kept.',
+        type=float,
+        default=0.1,
+    )
+    parser.add_argument(
+        '--min-pose-heatmap-val',
+        type=float,
+        default=0.4,
+    )
+    parser.add_argument(
+        '--image-out-dir',
+        help='the directory we plot to',
+    )
+    parser.add_argument(
+        '--plot-heatmap',
+        action='store_true',
+        help='indicates that the heatmap should be included in generated image output',
+    )
+    parser.add_argument(
+        '--minimum-keypoint-count',
+        type=int,
+        default=6,
+        help='the minimum number of points required before a pose is considered valid.'
+             ' Poses with fewer points will be discarded.',
+    )
+    parser.add_argument(
+        '--pose-dist-cap-px',
+        type=int,
+        default=15,
+        help='each pose keypoints distance value will be capped by this argument, So'
+             ' distances greater than this cap will be set to the cap value.',
+    )
+    parser.add_argument(
+        '--dist-out-file',
+        help='append the mean average distance to this file',
+    )
+
+    args = parser.parse_args()
+
+    # shorten some args
+    min_embed_sep = args.min_embed_sep_between_instances
+    max_embed_sep = args.max_embed_sep_within_instances
+    max_inst_dist = args.max_inst_dist_px
+
+    if args.image_out_dir is not None:
+        os.makedirs(args.image_out_dir, exist_ok=True)
+
+    print('=> loading configuration from {}'.format(args.cfg))
+
+    cfg.defrost()
+    cfg.merge_from_file(args.cfg)
+    if args.model_file:
+        cfg.TEST.MODEL_FILE = args.model_file
+    cfg.freeze()
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model_extra = cfg.MODEL.EXTRA
+    use_neighboring_frames = False
+    if 'USE_NEIGHBORING_FRAMES' in model_extra:
+        use_neighboring_frames = model_extra['USE_NEIGHBORING_FRAMES']
+
+    with torch.no_grad():
+
+        model = eval('models.' + cfg.MODEL.NAME + '.get_pose_net')(
+            cfg, is_train=False
+        )
+        print('=> loading model from {}'.format(cfg.TEST.MODEL_FILE))
+        model.load_state_dict(torch.load(cfg.TEST.MODEL_FILE), strict=False)
+        model.eval()
+        model = model.cuda()
+
+        normalize = transforms.Normalize(
+            mean=[0.485], std=[0.229]
+        )
+
+        image_list_filename = args.image_list
+        img_names = None
+        if image_list_filename is not None:
+            img_names = set()
+            with open(image_list_filename) as val_file:
+                for curr_line in val_file:
+                    img_name = curr_line.strip()
+                    img_names.add(img_name)
+
+        pose_labels = list(itertools.chain.from_iterable(parse_poses(f) for f in args.cvat_files))
+        if img_names is not None:
+            pose_labels = [p for p in pose_labels if p['image_name'] in img_names]
+
+        pose_dist_avg_sum = 0
+        for pose_label in pose_labels:
+            image_name = pose_label['image_name']
+            # pose_instances = pose_label['pose_instances']
+            label_pose_instances = [
+                aeutil.PoseInstance.from_xy_tensor(t)
+                for t in pose_label['pose_instances']
+            ]
+            print('image_name:', image_name)
+            print('== {} POSE INSTANCES FROM LABELS =='.format(len(label_pose_instances)))
+            # print([pi.keypoints for pi in label_pose_instances])
+
+
+            image_path = os.path.join(args.image_dir, image_name)
+
+            if use_neighboring_frames:
+                print("USE NEIGHBOR FRAMES")
+                vid_fragment, frame_index = decompose_frame_name(image_path)
+                prev_frame_path = '{}_{}.png'.format(vid_fragment, frame_index - 1)
+                next_frame_path = '{}_{}.png'.format(vid_fragment, frame_index + 1)
+
+                image_data_list = []
+                for i, path in enumerate([prev_frame_path, image_path, next_frame_path]):
+                    curr_image_data_numpy = skimage.io.imread(path, as_gray=True)
+                    if i == 1:
+                        image_data_numpy = curr_image_data_numpy
+
+                    curr_image_data = torch.from_numpy(curr_image_data_numpy).to(torch.float32)
+                    curr_image_data = normalize(curr_image_data.unsqueeze(0)).squeeze(0)
+                    image_data_list.append(curr_image_data)
+
+                image_data = torch.stack(image_data_list)
+
+            else:
+                print("DONT USE NEIGHBOR FRAMES")
+                image_data_numpy = skimage.io.imread(image_path, as_gray=True)
+
+                image_data = torch.from_numpy(image_data_numpy).to(torch.float32)
+                image_data = normalize(image_data.unsqueeze(0)).squeeze(0)
+                image_data = torch.stack([image_data] * 3)
+
+            # add a size 1 batch dimension to the image and move it to the GPU
+            image_data = image_data.unsqueeze(0).cuda()
+
+            inst_pose_data = model(image_data)
+            joint_count = inst_pose_data.size(1) // 2
+            pose_heatmaps = inst_pose_data[:, :joint_count, ...]
+            inst_embed_data = inst_pose_data[:, joint_count:, ...]
+
+            pose_localmax = aeutil.localmax2D(pose_heatmaps, args.min_pose_heatmap_val, 3)
+
+            batch_index = 0
+            inferred_pose_instances = aeutil.calc_pose_instances(
+                pose_heatmaps[batch_index, ...],
+                pose_localmax[batch_index, ...],
+                inst_embed_data[batch_index, ...],
+                min_embed_sep,
+                max_embed_sep,
+                max_inst_dist)
+
+            # filter out pose instances that have too few points
+            inferred_pose_instances = [
+                p for p in inferred_pose_instances
+                if len(p.keypoints) >= args.minimum_keypoint_count]
+            print('== {} POSE INSTANCES FROM INFERENCE =='.format(len(inferred_pose_instances)))
+            # print([pi.keypoints for pi in inferred_pose_instances])
+
+            if args.image_out_dir is not None:
+                image_rgb = np.zeros([image_data_numpy.shape[0], image_data_numpy.shape[1], 3], dtype=np.float32)
+                image_rgb[...] = image_data_numpy[..., np.newaxis]
+
+                for pose_index, pose_instance in enumerate(inferred_pose_instances):
+                    for keypoint in pose_instance.keypoints.values():
+                        rr, cc = skimage.draw.circle(
+                            keypoint['y_pos'], keypoint['x_pos'],
+                            3,
+                            image_rgb.shape)
+                        skimage.draw.set_color(image_rgb, (rr, cc), colors[pose_index % len(colors)])
+
+                if args.plot_heatmap:
+                    _, axs = plt.subplots(1, 3, figsize=(18, 6))
+                else:
+                    _, axs = plt.subplots(1, 2, figsize=(12, 6))
+                axs[0].imshow(image_rgb, aspect='equal')
+
+                for pose_index, pose_instance in enumerate(inferred_pose_instances):
+                    keypoints = sorted(pose_instance.keypoints.values(), key=lambda kp: kp['joint_index'])
+                    joint_indexes = [kp['joint_index'] for kp in keypoints]
+                    embed_vals = [kp['embed'] for kp in keypoints]
+                    axs[1].scatter(embed_vals, joint_indexes, c=[colors[pose_index % len(colors)]])
+
+                if args.plot_heatmap:
+                    curr_heatmaps = pose_heatmaps[batch_index, ...]
+                    max_heatmap, _ = curr_heatmaps.max(dim=0)
+                    max_heatmap_np = max_heatmap.cpu().numpy()
+                    max_heatmap_np[20, 20] = 1
+                    axs[2].imshow(max_heatmap_np)
+
+                image_base, _ = os.path.splitext(os.path.basename(image_name))
+                plt.savefig(os.path.join(
+                    args.image_out_dir,
+                    image_base + '_instance_pose.png'))
+
+                plt.close()
+
+            # match up poses by distance
+            pose_combos = []
+            for lbl_pose_i, lbl_pose in enumerate(label_pose_instances):
+                for inf_pose_i, inf_pose in enumerate(inferred_pose_instances):
+                    curr_dist, _ = capped_pose_distance(inf_pose, lbl_pose, args.pose_dist_cap_px)
+                    pose_combos.append((lbl_pose_i, inf_pose_i, curr_dist))
+
+            # sort pose combinations by distance
+            pose_combos.sort(key=lambda pcombo: pcombo[2])
+
+            pose_dist_sum = 0
+            lbl_pose_count = len(label_pose_instances)
+            inf_pose_count = len(inferred_pose_instances)
+            unmatched_lbl_poses = set(range(lbl_pose_count))
+            unmatched_inf_poses = set(range(inf_pose_count))
+            for lbl_pose_i, inf_pose_i, curr_dist in pose_combos:
+                if lbl_pose_i in unmatched_lbl_poses and inf_pose_i in unmatched_inf_poses:
+                    pose_dist_sum += curr_dist
+                    unmatched_lbl_poses.remove(lbl_pose_i)
+                    unmatched_inf_poses.remove(inf_pose_i)
+
+            # unmatched poses will be treated as if every point is at the capped distance
+            pose_count_diff = abs(inf_pose_count - lbl_pose_count)
+            pose_dist_sum += args.pose_dist_cap_px * pose_count_diff
+
+            max_pose_count = max(lbl_pose_count, inf_pose_count)
+            pose_dist_avg = pose_dist_sum / max_pose_count
+            pose_dist_avg_sum += pose_dist_avg
+
+            print('$$$$$$$$ POSE DIST:', pose_dist_avg)
+            print()
+
+        pose_dist_mean_avg = pose_dist_avg_sum / len(pose_labels)
+        print('pose_dist_mean_avg:', pose_dist_mean_avg)
+
+        if args.dist_out_file is not None:
+            with open(args.dist_out_file, 'a') as dist_out_file:
+                dist_out_file.write('{}\t{}\n'.format(args.cfg, pose_dist_mean_avg))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/trainfecalboli.py b/tools/trainfecalboli.py
new file mode 100644
index 0000000..57f34be
--- /dev/null
+++ b/tools/trainfecalboli.py
@@ -0,0 +1,226 @@
+import argparse
+import csv
+import functools
+import itertools
+import os
+import pprint
+import random
+import shutil
+
+import torch
+import torch.backends.cudnn as cudnn
+from torch.utils.tensorboard import SummaryWriter
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+from core.fecalbolifunc import train, validate
+from core.assocembedloss import weighted_bcelogit_loss
+from dataset.fecalbolidata import FecalBoliDataset, parse_fecal_boli_labels
+from utils.utils import get_optimizer
+from utils.utils import save_checkpoint
+from utils.utils import create_logger
+
+import models
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='train fecal boli detection network')
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files to use',
+                        nargs='+',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-dir',
+                        help='directory containing images',
+                        required=True,
+                        type=str)
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    return args
+
+#   python tools/trainfecalboli.py \
+#       --cfg experiments/fecalboli/fecalboli_2020-05-0-08.yaml \
+#       --cvat-files data/fecal-boli/*.xml \
+#       --image-dir data/fecal-boli/images
+def main():
+    args = parse_args()
+    update_config(cfg, args)
+
+    logger, final_output_dir, _ = create_logger(
+        cfg, args.cfg, 'train')
+
+    logger.info(pprint.pformat(args))
+    logger.info(cfg)
+
+    swriter = SummaryWriter(os.path.join(final_output_dir, 'tb'))
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True).cuda()
+
+    # copy model file
+    this_dir = os.path.dirname(__file__)
+    shutil.copy2(
+        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
+        final_output_dir)
+
+    if cfg.LOSS.POSE_LOSS_FUNC == 'MSE':
+        criterion = torch.nn.MSELoss()
+    elif cfg.LOSS.POSE_LOSS_FUNC == 'WEIGHTED_BCE':
+        criterion = functools.partial(
+                weighted_bcelogit_loss,
+                pos_weight = cfg.LOSS.POSITIVE_LABEL_WEIGHT)
+    else:
+        raise Exception('Unknown pose loss function: {}'.format(cfg.LOSS.POSE_LOSS_FUNC))
+
+    # Data loading code
+    fecal_boli_labels = list(itertools.chain.from_iterable(
+        parse_fecal_boli_labels(f) for f in args.cvat_files))
+    validation_set_filename = cfg.DATASET.TEST_SET
+    val_img_names = set()
+    if os.path.exists(validation_set_filename):
+        with open(validation_set_filename) as val_file:
+            for curr_line in val_file:
+                img_name = curr_line.strip()
+                val_img_names.add(img_name)
+
+    else:
+        img_names = {lbl['image_name'] for lbl in fecal_boli_labels}
+        val_count = round(len(img_names) * cfg.DATASET.TEST_SET_PROPORTION)
+        val_img_names = set(random.sample(img_names, val_count))
+
+        logger.info("=> saving validation image names to '{}'".format(validation_set_filename))
+        with open(validation_set_filename, 'w') as val_file:
+            for img_name in val_img_names:
+                val_file.write(img_name)
+                val_file.write('\n')
+
+    transform = transforms.Normalize(mean=[0.485], std=[0.229])
+
+    train_labels = [lbl for lbl in fecal_boli_labels if lbl['image_name'] not in val_img_names]
+    train_ds = FecalBoliDataset(
+        cfg,
+        args.image_dir,
+        train_labels,
+        True,
+        transform,
+    )
+    train_loader = torch.utils.data.DataLoader(
+        train_ds,
+        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU,
+        shuffle=cfg.TRAIN.SHUFFLE,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+        drop_last=True,
+    )
+
+    val_labels = [lbl for lbl in fecal_boli_labels if lbl['image_name'] in val_img_names]
+    val_ds = FecalBoliDataset(
+        cfg,
+        args.image_dir,
+        val_labels,
+        False,
+        transform,
+    )
+    valid_loader = torch.utils.data.DataLoader(
+        val_ds,
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU,
+        shuffle=False,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+
+    logger.info("=> full data set size: {}; training/validation: {} [{}]/{} [{}]".format(
+        len(fecal_boli_labels), len(train_labels), len(train_ds), len(val_labels), len(val_ds)))
+
+    best_perf = None
+    last_epoch = -1
+    optimizer = get_optimizer(cfg, model)
+    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
+        last_epoch=last_epoch
+    )
+
+    train_table_fname = os.path.join(final_output_dir, 'training.tsv')
+    val_table_fname = os.path.join(final_output_dir, 'validation.tsv')
+    with    open(train_table_fname, 'w', newline='') as train_table_f, \
+            open(val_table_fname, 'w', newline='') as val_table_f:
+
+        train_header = ['Epoch', 'Batch', 'Loss', 'Batch Time', 'Batch Size']
+        train_table_writer = csv.DictWriter(train_table_f, fieldnames=train_header, delimiter='\t')
+        train_table_writer.writeheader()
+
+        val_header = ['Epoch', 'Loss', 'Performance Indicator']
+        val_table_writer = csv.DictWriter(val_table_f, fieldnames=val_header, delimiter='\t')
+        val_table_writer.writeheader()
+
+        logger.info('entering epoch loop from: {} to {}'.format(begin_epoch, cfg.TRAIN.END_EPOCH))
+        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
+
+            # train for one epoch
+            train(
+                cfg,
+                train_loader,
+                model,
+                criterion,
+                optimizer,
+                train_table_writer,
+                swriter,
+                epoch)
+
+            # evaluate on validation set
+            perf_indicator = validate(
+                cfg,
+                valid_loader,
+                model,
+                criterion,
+                val_table_writer,
+                swriter,
+                epoch)
+
+            if best_perf is None or perf_indicator >= best_perf:
+                best_perf = perf_indicator
+                logger.info('*** NEW BEST *** {}'.format(perf_indicator))
+                best_model_state_file = os.path.join(final_output_dir, 'best_state.pth')
+                logger.info('=> saving best model state to {}'.format(best_model_state_file))
+                torch.save(model.state_dict(), best_model_state_file)
+
+            lr_scheduler.step()
+
+        final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
+        logger.info('=> saving final model state to {}'.format(final_model_state_file))
+        torch.save(model.state_dict(), final_model_state_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/trainmultimouse.py b/tools/trainmultimouse.py
new file mode 100644
index 0000000..e34f18f
--- /dev/null
+++ b/tools/trainmultimouse.py
@@ -0,0 +1,267 @@
+import argparse
+import csv
+import functools
+import itertools
+import os
+import pprint
+import random
+import shutil
+
+import torch
+import torch.backends.cudnn as cudnn
+from torch.utils.tensorboard import SummaryWriter
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+from core.assocembedfunc import train, validate
+from core.assocembedloss import PoseEstAssocEmbedLoss, balanced_bcelogit_loss, weighted_bcelogit_loss
+from dataset.multimousepose import MultiPoseDataset, parse_poses
+from utils.utils import get_optimizer
+from utils.utils import save_checkpoint
+from utils.utils import create_logger
+
+import models
+
+# Example(s):
+#
+#   python tools/trainmultimouse.py \
+#       --cfg experiments/multimouse/multimouse-1.yaml \
+#       --cvat-files \
+#           /run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Annotations/*.xml \
+#           /run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Annotations_NoMarkings/*.xml \
+#       --image-dir '/run/user/1002/gvfs/smb-share:server=bht2stor.jax.org,share=vkumar/kumarlab-new/Brian/NeuralNets/MultiMousePose/Dataset'
+#
+#   singularity exec --nv vm/multi-mouse-pose-2019-11-04.sif python3 tools/trainmultimouse.py \
+#       --cfg experiments/multimouse/multimouse_2019-12-31_1.yaml \
+#       --cvat-files \
+#           data/multi-mouse/Annotations/*xml \
+#           data/multi-mouse/Annotations_NoMarkings/*.xml \
+#       --image-dir data/multi-mouse/Dataset
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='train multi-mouse pose network')
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files to use',
+                        nargs='+',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-dir',
+                        help='directory containing images',
+                        required=True,
+                        type=str)
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--prevModelDir',
+                        help='prev Model directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+    update_config(cfg, args)
+
+    logger, final_output_dir, _ = create_logger(
+        cfg, args.cfg, 'train')
+
+    logger.info(pprint.pformat(args))
+    logger.info(cfg)
+
+    swriter = SummaryWriter(os.path.join(final_output_dir, 'tb'))
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True).cuda()
+
+    # copy model file
+    this_dir = os.path.dirname(__file__)
+    shutil.copy2(
+        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
+        final_output_dir)
+
+    if cfg.LOSS.POSE_LOSS_FUNC == 'MSE':
+        criterion = PoseEstAssocEmbedLoss(
+            pose_heatmap_weight = cfg.LOSS.POSE_HEATMAP_WEIGHT,
+            assoc_embedding_weight = cfg.LOSS.ASSOC_EMBEDDING_WEIGHT,
+            separation_term_weight = 5,
+            sigma = 5)
+    elif cfg.LOSS.POSE_LOSS_FUNC == 'BALANCED_BCE':
+        criterion = PoseEstAssocEmbedLoss(
+            pose_heatmap_weight = cfg.LOSS.POSE_HEATMAP_WEIGHT,
+            assoc_embedding_weight = cfg.LOSS.ASSOC_EMBEDDING_WEIGHT,
+            separation_term_weight = 5,
+            sigma = 5,
+            pose_loss_func = functools.partial(
+                balanced_bcelogit_loss,
+                fairness_quotient = cfg.LOSS.BALANCED_BCE_FAIRNESS_QUOTIENT))
+    elif cfg.LOSS.POSE_LOSS_FUNC == 'WEIGHTED_BCE':
+        criterion = PoseEstAssocEmbedLoss(
+            pose_heatmap_weight = cfg.LOSS.POSE_HEATMAP_WEIGHT,
+            assoc_embedding_weight = cfg.LOSS.ASSOC_EMBEDDING_WEIGHT,
+            separation_term_weight = 5,
+            sigma = 5,
+            pose_loss_func = functools.partial(
+                weighted_bcelogit_loss,
+                pos_weight = cfg.LOSS.POSITIVE_LABEL_WEIGHT))
+    else:
+        raise Exception('Unknown pose loss function: {}'.format(cfg.LOSS.POSE_LOSS_FUNC))
+
+    # Data loading code
+    pose_labels = list(itertools.chain.from_iterable(parse_poses(f) for f in args.cvat_files))
+    validation_set_filename = cfg.DATASET.TEST_SET
+    val_img_names = set()
+    if os.path.exists(validation_set_filename):
+        with open(validation_set_filename) as val_file:
+            for curr_line in val_file:
+                img_name = curr_line.strip()
+                val_img_names.add(img_name)
+
+    else:
+        img_names = {lbl['image_name'] for lbl in pose_labels}
+        val_count = round(len(img_names) * cfg.DATASET.TEST_SET_PROPORTION)
+        val_img_names = set(random.sample(img_names, val_count))
+
+        logger.info("=> saving validation image names to '{}'".format(validation_set_filename))
+        with open(validation_set_filename, 'w') as val_file:
+            for img_name in val_img_names:
+                val_file.write(img_name)
+                val_file.write('\n')
+
+    model_extra = cfg.MODEL.EXTRA
+    use_neighboring_frames = False
+    if 'USE_NEIGHBORING_FRAMES' in model_extra:
+        use_neighboring_frames = model_extra['USE_NEIGHBORING_FRAMES']
+ 
+    if use_neighboring_frames:
+        transform = transforms.Normalize(mean=[0.485] * 3, std=[0.229] * 3)
+    else:
+        transform = transforms.Normalize(mean=[0.485], std=[0.229])
+
+    train_labels = [lbl for lbl in pose_labels if lbl['image_name'] not in val_img_names]
+    train_ds = MultiPoseDataset(
+        cfg,
+        args.image_dir,
+        train_labels,
+        True,
+        transform,
+    )
+    train_loader = torch.utils.data.DataLoader(
+        train_ds,
+        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU,
+        shuffle=cfg.TRAIN.SHUFFLE,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+        drop_last=True,
+    )
+
+    val_labels = [lbl for lbl in pose_labels if lbl['image_name'] in val_img_names]
+    val_ds = MultiPoseDataset(
+        cfg,
+        args.image_dir,
+        val_labels,
+        False,
+        transform,
+    )
+    valid_loader = torch.utils.data.DataLoader(
+        val_ds,
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU,
+        shuffle=False,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+
+    logger.info("=> full data set size: {}; training/validation: {} [{}]/{} [{}]".format(
+        len(pose_labels), len(train_labels), len(train_ds), len(val_labels), len(val_ds)))
+
+    best_perf = None
+    last_epoch = -1
+    optimizer = get_optimizer(cfg, model)
+    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
+        last_epoch=last_epoch
+    )
+
+    train_table_fname = os.path.join(final_output_dir, 'training.tsv')
+    val_table_fname = os.path.join(final_output_dir, 'validation.tsv')
+    with    open(train_table_fname, 'w', newline='') as train_table_f, \
+            open(val_table_fname, 'w', newline='') as val_table_f:
+
+        train_header = ['Epoch', 'Batch', 'Loss', 'Batch Time', 'Batch Size']
+        train_table_writer = csv.DictWriter(train_table_f, fieldnames=train_header, delimiter='\t')
+        train_table_writer.writeheader()
+
+        val_header = ['Epoch', 'Loss', 'Performance Indicator']
+        val_table_writer = csv.DictWriter(val_table_f, fieldnames=val_header, delimiter='\t')
+        val_table_writer.writeheader()
+
+        logger.info('entering epoch loop from: {} to {}'.format(begin_epoch, cfg.TRAIN.END_EPOCH))
+        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
+
+            # train for one epoch
+            train(
+                cfg,
+                train_loader,
+                model,
+                criterion,
+                optimizer,
+                train_table_writer,
+                swriter,
+                epoch)
+
+            # evaluate on validation set
+            perf_indicator = validate(
+                cfg,
+                valid_loader,
+                model,
+                criterion,
+                val_table_writer,
+                swriter,
+                epoch)
+
+            if best_perf is None or perf_indicator >= best_perf:
+                best_perf = perf_indicator
+                logger.info('*** NEW BEST *** {}'.format(perf_indicator))
+                best_model_state_file = os.path.join(final_output_dir, 'best_state.pth')
+                logger.info('=> saving best model state to {}'.format(best_model_state_file))
+                torch.save(model.state_dict(), best_model_state_file)
+
+            lr_scheduler.step()
+
+        final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
+        logger.info('=> saving final model state to {}'.format(final_model_state_file))
+        torch.save(model.state_dict(), final_model_state_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/trainsimplepoint.py b/tools/trainsimplepoint.py
new file mode 100644
index 0000000..d926062
--- /dev/null
+++ b/tools/trainsimplepoint.py
@@ -0,0 +1,232 @@
+import argparse
+import csv
+import functools
+import itertools
+import os
+import pprint
+import random
+import shutil
+
+import torch
+import torch.backends.cudnn as cudnn
+from torch.utils.tensorboard import SummaryWriter
+import torchvision.transforms as transforms
+
+import _init_paths
+from config import cfg
+from config import update_config
+from core.fecalbolifunc import train, validate
+from core.assocembedloss import weighted_bcelogit_loss
+from dataset.simplepointdata import SimplePointDataset, parse_point_labels
+from utils.utils import get_optimizer
+from utils.utils import save_checkpoint
+from utils.utils import create_logger
+
+import models
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='train simple point detection network')
+
+    parser.add_argument('--cvat-files',
+                        help='list of CVAT XML files to use',
+                        nargs='+',
+                        required=True,
+                        type=str)
+    parser.add_argument('--image-dir',
+                        help='directory containing images',
+                        required=True,
+                        type=str)
+    parser.add_argument('--cfg',
+                        help='experiment configure file name',
+                        required=True,
+                        type=str)
+
+    parser.add_argument('opts',
+                        help="Modify config options using the command-line",
+                        default=None,
+                        nargs=argparse.REMAINDER)
+
+    parser.add_argument('--modelDir',
+                        help='model directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--logDir',
+                        help='log directory',
+                        type=str,
+                        default='')
+    parser.add_argument('--dataDir',
+                        help='data directory',
+                        type=str,
+                        default='')
+
+    args = parser.parse_args()
+
+    return args
+
+# Examples:
+#   python tools/trainsimplepoint.py \
+#       --cfg experiments/corner/corner_2020-06-30_01.yaml \
+#       --cvat-files data/corner/*.xml \
+#       --image-dir data/corner/corner-images
+def main():
+    args = parse_args()
+    update_config(cfg, args)
+
+    logger, final_output_dir, _ = create_logger(
+        cfg, args.cfg, 'train')
+
+    logger.info(pprint.pformat(args))
+    logger.info(cfg)
+
+    swriter = SummaryWriter(os.path.join(final_output_dir, 'tb'))
+
+    # cudnn related setting
+    cudnn.benchmark = cfg.CUDNN.BENCHMARK
+    torch.backends.cudnn.deterministic = cfg.CUDNN.DETERMINISTIC
+    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED
+
+    model = eval('models.'+cfg.MODEL.NAME+'.get_pose_net')(cfg, is_train=True).cuda()
+
+    # copy model file
+    this_dir = os.path.dirname(__file__)
+    shutil.copy2(
+        os.path.join(this_dir, '../lib/models', cfg.MODEL.NAME + '.py'),
+        final_output_dir)
+
+    if cfg.LOSS.POSE_LOSS_FUNC == 'MSE':
+        criterion = torch.nn.MSELoss()
+    elif cfg.LOSS.POSE_LOSS_FUNC == 'WEIGHTED_BCE':
+        criterion = functools.partial(
+                weighted_bcelogit_loss,
+                pos_weight = cfg.LOSS.POSITIVE_LABEL_WEIGHT)
+    else:
+        raise Exception('Unknown pose loss function: {}'.format(cfg.LOSS.POSE_LOSS_FUNC))
+
+    # Data loading code
+    simple_point_labels = list(itertools.chain.from_iterable(
+        parse_point_labels(f, 'corner') for f in args.cvat_files))
+    simple_point_labels = [
+        l for l in simple_point_labels
+        if os.path.exists(os.path.join(args.image_dir, l['image_name']))
+    ]
+
+    validation_set_filename = cfg.DATASET.TEST_SET
+    val_img_names = set()
+    if os.path.exists(validation_set_filename):
+        with open(validation_set_filename) as val_file:
+            for curr_line in val_file:
+                img_name = curr_line.strip()
+                val_img_names.add(img_name)
+
+    else:
+        img_names = {lbl['image_name'] for lbl in simple_point_labels}
+        val_count = round(len(img_names) * cfg.DATASET.TEST_SET_PROPORTION)
+        val_img_names = set(random.sample(img_names, val_count))
+
+        logger.info("=> saving validation image names to '{}'".format(validation_set_filename))
+        with open(validation_set_filename, 'w') as val_file:
+            for img_name in val_img_names:
+                val_file.write(img_name)
+                val_file.write('\n')
+
+    transform = transforms.Normalize(mean=[0.485], std=[0.229])
+
+    train_labels = [lbl for lbl in simple_point_labels if lbl['image_name'] not in val_img_names]
+    train_ds = SimplePointDataset(
+        cfg,
+        args.image_dir,
+        train_labels,
+        True,
+        transform,
+    )
+    train_loader = torch.utils.data.DataLoader(
+        train_ds,
+        batch_size=cfg.TRAIN.BATCH_SIZE_PER_GPU,
+        shuffle=cfg.TRAIN.SHUFFLE,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+        drop_last=True,
+    )
+
+    val_labels = [lbl for lbl in simple_point_labels if lbl['image_name'] in val_img_names]
+    val_ds = SimplePointDataset(
+        cfg,
+        args.image_dir,
+        val_labels,
+        False,
+        transform,
+    )
+    valid_loader = torch.utils.data.DataLoader(
+        val_ds,
+        batch_size=cfg.TEST.BATCH_SIZE_PER_GPU,
+        shuffle=False,
+        num_workers=cfg.WORKERS,
+        pin_memory=cfg.PIN_MEMORY,
+    )
+
+    logger.info("=> full data set size: {}; training/validation: {} [{}]/{} [{}]".format(
+        len(simple_point_labels), len(train_labels), len(train_ds), len(val_labels), len(val_ds)))
+
+    best_perf = None
+    last_epoch = -1
+    optimizer = get_optimizer(cfg, model)
+    begin_epoch = cfg.TRAIN.BEGIN_EPOCH
+    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
+        optimizer, cfg.TRAIN.LR_STEP, cfg.TRAIN.LR_FACTOR,
+        last_epoch=last_epoch
+    )
+
+    train_table_fname = os.path.join(final_output_dir, 'training.tsv')
+    val_table_fname = os.path.join(final_output_dir, 'validation.tsv')
+    with    open(train_table_fname, 'w', newline='') as train_table_f, \
+            open(val_table_fname, 'w', newline='') as val_table_f:
+
+        train_header = ['Epoch', 'Batch', 'Loss', 'Batch Time', 'Batch Size']
+        train_table_writer = csv.DictWriter(train_table_f, fieldnames=train_header, delimiter='\t')
+        train_table_writer.writeheader()
+
+        val_header = ['Epoch', 'Loss', 'Performance Indicator']
+        val_table_writer = csv.DictWriter(val_table_f, fieldnames=val_header, delimiter='\t')
+        val_table_writer.writeheader()
+
+        logger.info('entering epoch loop from: {} to {}'.format(begin_epoch, cfg.TRAIN.END_EPOCH))
+        for epoch in range(begin_epoch, cfg.TRAIN.END_EPOCH):
+
+            # train for one epoch
+            train(
+                cfg,
+                train_loader,
+                model,
+                criterion,
+                optimizer,
+                train_table_writer,
+                swriter,
+                epoch)
+
+            # evaluate on validation set
+            perf_indicator = validate(
+                cfg,
+                valid_loader,
+                model,
+                criterion,
+                val_table_writer,
+                swriter,
+                epoch)
+
+            if best_perf is None or perf_indicator >= best_perf:
+                best_perf = perf_indicator
+                logger.info('*** NEW BEST *** {}'.format(perf_indicator))
+                best_model_state_file = os.path.join(final_output_dir, 'best_state.pth')
+                logger.info('=> saving best model state to {}'.format(best_model_state_file))
+                torch.save(model.state_dict(), best_model_state_file)
+
+            lr_scheduler.step()
+
+        final_model_state_file = os.path.join(final_output_dir, 'final_state.pth')
+        logger.info('=> saving final model state to {}'.format(final_model_state_file))
+        torch.save(model.state_dict(), final_model_state_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/vm/corner-detection-2021-08-25.def b/vm/corner-detection-2021-08-25.def
new file mode 100644
index 0000000..0d7e91f
--- /dev/null
+++ b/vm/corner-detection-2021-08-25.def
@@ -0,0 +1,36 @@
+# build like:
+#   singularity build --fakeroot corner-detection-2021-08-25.sif corner-detection-2021-08-25.def
+
+Bootstrap: docker
+From: nvcr.io/nvidia/cuda:10.2-base-ubuntu18.04
+
+%files
+    /home/sheppk/projects/corner-detection-env /
+    ../requirements.txt /
+
+%runscript
+
+    # /corner-detection-env/corner-conf.yaml is experiments/corner/corner_2020-06-30_01.yaml
+    # and /corner-detection-env/cornerdetection.pth is
+    # output-corner/simplepoint/pose_hrnet/corner_2020-06-30_01/best_state.pth
+    python3 -u /corner-detection-env/deep-hres-net/tools/infercorners.py \
+        --cfg /corner-detection-env/corner-conf.yaml \
+        --model-file /corner-detection-env/cornerdetection.pth \
+        --videos "${1}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    pip3 install --upgrade pip
+
+    # Cluster needs CUDA 9.2 version so we install separately
+    pip3 install torch==1.3.0+cu92 torchvision==0.4.1+cu92 -f https://download.pytorch.org/whl/torch_stable.html
+    pip3 install -r /requirements.txt
diff --git a/vm/deep-hres-net-2019-06-28.def b/vm/deep-hres-net-2019-06-28.def
new file mode 100644
index 0000000..580aaa3
--- /dev/null
+++ b/vm/deep-hres-net-2019-06-28.def
@@ -0,0 +1,29 @@
+Bootstrap: localimage
+From: cuda-2019-05-13.simg
+
+%files
+    /home/sheppk/projects/pose-est-env /
+
+%runscript
+
+    python3 -u /pose-est-env/deep-hres-net/tools/infermousepose.py \
+        --model-file "/pose-est-env/pose-est-model.pth" \
+        "/pose-est-env/pose-est-conf.yaml" \
+        "${1}" "${2}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
+    pip3 install 'torchvision==0.2.2' # getting "ImportError: libcudart.so.9.0" with 0.3
+    pip3 install -r /pose-est-env/deep-hres-net/requirements.txt
+
+    chmod -R a+rw /pose-est-env
diff --git a/vm/extract-frames.sh b/vm/extract-frames.sh
new file mode 100755
index 0000000..338c45a
--- /dev/null
+++ b/vm/extract-frames.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+#SBATCH --job-name=extract-frames
+#
+#SBATCH --qos=batch
+#SBATCH --time=6:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --mem=8G
+
+trim_sp() {
+    local var="$*"
+    # remove leading whitespace characters
+    var="${var#"${var%%[![:space:]]*}"}"
+    # remove trailing whitespace characters
+    var="${var%"${var##*[![:space:]]}"}"
+    echo -n "$var"
+}
+
+export PATH="/opt/singularity/bin:${PATH}"
+if [[ -n "${SLURM_JOB_ID}" ]]
+then
+    # the script is being run by slurm
+    if [[ -n "${SLURM_ARRAY_TASK_ID}" ]]
+    then
+        if [[ ( -n "${BATCH_FILE}" ) && ( -n "${OUT_DIR}" ) ]]
+        then
+
+            echo "DUMP OF CURRENT ENVIRONMENT:"
+            env
+            echo "BEGIN PROCESSING: ${BATCH_FILE} => ${OUT_DIR} for row ${SLURM_ARRAY_TASK_ID}"
+
+            module load singularity
+            singularity exec "${ROOT_DIR}/multi-mouse-pose-2020-02-12.sif" python3 "${ROOT_DIR}/extractframes.py" \
+                --frame-table "${BATCH_FILE}" \
+                --frame-table-row "${SLURM_ARRAY_TASK_ID}" \
+                --root-dir "$(dirname "${BATCH_FILE}")" \
+                --outdir "${OUT_DIR}"
+
+            echo "FINISHED PROCESSING"
+
+        else
+            echo "ERROR: the BATCH_FILE or OUT_DIR environment variable is not defined" >&2
+        fi
+    else
+        echo "ERROR: no SLURM_ARRAY_TASK_ID found" >&2
+    fi
+else
+    # the script is being run from command line. We should do a self-submit as an array job
+    if [[ ( -f "${1}" ) &&  ( -n "${2}" ) ]]
+    then
+        # echo "${1} is set and not empty"
+        echo "Preparing to submit batch file: ${1}"
+        test_count=$(wc -l < "${1}")
+        echo "Submitting an array job for ${test_count} videos"
+
+        mkdir -p "${2}"
+
+        # Here we perform a self-submit
+        sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}",OUT_DIR="${2}" --array="1-${test_count}" "${0}"
+    else
+        echo "ERROR: you need to provide a batch file to process and output dir. Eg: extract-frames.sh batchfile.txt out" >&2
+        exit 1
+    fi
+fi
diff --git a/vm/infer-corners-batch.sh b/vm/infer-corners-batch.sh
new file mode 100755
index 0000000..0e0e797
--- /dev/null
+++ b/vm/infer-corners-batch.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+#
+#SBATCH --job-name=infer-corners
+#
+#SBATCH --time=6:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=10
+#SBATCH --gres=gpu:1
+#SBATCH --qos=inference
+#SBATCH --mem=16G
+#SBATCH --nice
+
+trim_sp() {
+    local var="$*"
+    # remove leading whitespace characters
+    var="${var#"${var%%[![:space:]]*}"}"
+    # remove trailing whitespace characters
+    var="${var%"${var##*[![:space:]]}"}"
+    echo -n "$var"
+}
+
+export PATH="/opt/singularity/bin:${PATH}"
+if [[ -n "${SLURM_JOB_ID}" ]]
+then
+    # the script is being run by slurm
+    if [[ -n "${SLURM_ARRAY_TASK_ID}" ]]
+    then
+        if [[ -n "${BATCH_FILE}" ]]
+        then
+            # here we use the array ID to pull out the right video
+            VIDEO_FILE=$(trim_sp $(sed -n "${SLURM_ARRAY_TASK_ID}{p;q;}" < "${BATCH_FILE}"))
+            cd "$(dirname "${BATCH_FILE}")"
+            if [[ -f "${VIDEO_FILE}" ]]
+            then
+                echo "${VIDEO_FILE}"
+                echo "DUMP OF CURRENT ENVIRONMENT:"
+                env
+                echo "BEGIN PROCESSING: ${VIDEO_FILE}"
+                CORNERS_FILE="${VIDEO_FILE%.*}_corners_v2.yaml"
+                module load singularity
+                singularity run --nv "${ROOT_DIR}/corner-detection-2021-08-25.sif" "${VIDEO_FILE}"
+
+                # Retry several times if we have to. Unfortunately this is needed because
+                # ffmpeg will sporadically give the following error on winter:
+                #       ffmpeg: symbol lookup error: /.singularity.d/libs/libGL.so.1: undefined symbol: _glapi_tls_Current
+                #
+                # You can test this by simply running:
+                #       singularity exec --nv corner-detection-2021-08-25.sif ffmpeg
+                #
+                # which will fail about 1 out of 10 times or so. I (Keith) haven't been able to
+                # figure out a solution for this except for retrying several times.
+                MAX_RETRIES=10
+                for (( i=0; i<"${MAX_RETRIES}"; i++ ))
+                do
+                    if [[ ! -f "${CORNERS_FILE}" ]]
+                    then
+                        echo "WARNING: FAILED TO GENERATE OUTPUT FILE. RETRY ATTEMPT ${i}"
+                        singularity run --nv "${ROOT_DIR}/corner-detection-2021-08-25.sif" "${VIDEO_FILE}"
+                    fi
+                done
+
+                if [[ ! -f "${CORNERS_FILE}" ]]
+                then
+                    echo "ERROR: FAILED TO GENERATE OUTPUT FILE WITH NO MORE RETRIES"
+                fi
+
+                echo "FINISHED PROCESSING: ${VIDEO_FILE}"
+            else
+                echo "ERROR: could not find video file: ${VIDEO_FILE}" >&2
+            fi
+        else
+            echo "ERROR: the BATCH_FILE environment variable is not defined" >&2
+        fi
+    else
+        echo "ERROR: no SLURM_ARRAY_TASK_ID found" >&2
+    fi
+else
+    # the script is being run from command line. We should do a self-submit as an array job
+    if [[ -f "${1}" ]]
+    then
+        # echo "${1} is set and not empty"
+        echo "Preparing to submit batch file: ${1}"
+        test_count=$(wc -l < "${1}")
+        echo "Submitting an array job for ${test_count} videos"
+
+        # Here we perform a self-submit
+        echo sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}" --array="1-${test_count}%24" "${0}"
+        sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}" --array="1-${test_count}%24" "${0}"
+    else
+        echo "ERROR: you need to provide a batch file to process. Eg: ./infer-corners-batch.sh batchfile.txt" >&2
+        exit 1
+    fi
+fi
diff --git a/vm/infer-multi-poseest-batch.sh b/vm/infer-multi-poseest-batch.sh
new file mode 100755
index 0000000..4256dda
--- /dev/null
+++ b/vm/infer-multi-poseest-batch.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+#
+#SBATCH --job-name=infer-multi-poseest-arr
+#
+#SBATCH --time=6:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=10
+#SBATCH --gres=gpu:1
+#SBATCH --qos=inference
+#SBATCH --mem=16G
+#SBATCH --nice
+
+trim_sp() {
+    local var="$*"
+    # remove leading whitespace characters
+    var="${var#"${var%%[![:space:]]*}"}"
+    # remove trailing whitespace characters
+    var="${var%"${var##*[![:space:]]}"}"
+    echo -n "$var"
+}
+
+export PATH="/opt/singularity/bin:${PATH}"
+if [[ -n "${SLURM_JOB_ID}" ]]
+then
+    # the script is being run by slurm
+    if [[ -n "${SLURM_ARRAY_TASK_ID}" ]]
+    then
+        if [[ -n "${BATCH_FILE}" ]]
+        then
+            # here we use the array ID to pull out the right video
+            VIDEO_FILE=$(trim_sp $(sed -n "${SLURM_ARRAY_TASK_ID}{p;q;}" < "${BATCH_FILE}"))
+            cd "$(dirname "${BATCH_FILE}")"
+            if [[ -f "${VIDEO_FILE}" ]]
+            then
+                echo "${VIDEO_FILE}"
+                echo "DUMP OF CURRENT ENVIRONMENT:"
+                env
+                echo "BEGIN PROCESSING: ${VIDEO_FILE}"
+                H5_OUT_FILE="${VIDEO_FILE%.*}_pose_est_v3.h5"
+                module load singularity
+                singularity run --nv "${ROOT_DIR}/multi-mouse-pose-2020-02-12.sif" "${VIDEO_FILE}" "${H5_OUT_FILE}"
+
+                # Retry several times if we have to. Unfortunately this is needed because
+                # ffmpeg will sporadically give the following error on winter:
+                #       ffmpeg: symbol lookup error: /.singularity.d/libs/libGL.so.1: undefined symbol: _glapi_tls_Current
+                #
+                # You can test this by simply running:
+                #       singularity exec --nv multi-mouse-pose-2020-02-12.sif ffmpeg
+                #
+                # which will fail about 1 out of 10 times or so. I (Keith) haven't been able to
+                # figure out a solution for this except for retrying several times.
+                MAX_RETRIES=10
+                for (( i=0; i<"${MAX_RETRIES}"; i++ ))
+                do
+                    if [[ ! -f "${H5_OUT_FILE}" ]]
+                    then
+                        echo "WARNING: FAILED TO GENERATE OUTPUT FILE. RETRY ATTEMPT ${i}"
+                        singularity run --nv "${ROOT_DIR}/multi-mouse-pose-2020-02-12.sif" "${VIDEO_FILE}" "${H5_OUT_FILE}"
+                    fi
+                done
+
+                if [[ ! -f "${H5_OUT_FILE}" ]]
+                then
+                    echo "ERROR: FAILED TO GENERATE OUTPUT FILE WITH NO MORE RETRIES"
+                fi
+
+                echo "FINISHED PROCESSING: ${VIDEO_FILE}"
+            else
+                echo "ERROR: could not find video file: ${VIDEO_FILE}" >&2
+            fi
+        else
+            echo "ERROR: the BATCH_FILE environment variable is not defined" >&2
+        fi
+    else
+        echo "ERROR: no SLURM_ARRAY_TASK_ID found" >&2
+    fi
+else
+    # the script is being run from command line. We should do a self-submit as an array job
+    if [[ -f "${1}" ]]
+    then
+        # echo "${1} is set and not empty"
+        echo "Preparing to submit batch file: ${1}"
+        test_count=$(wc -l < "${1}")
+        echo "Submitting an array job for ${test_count} videos"
+
+        # Here we perform a self-submit
+        sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}" --array="1-${test_count}%24" "${0}"
+    else
+        echo "ERROR: you need to provide a batch file to process. Eg: ./infer-multi-poseest-batch.sh batchfile.txt" >&2
+        exit 1
+    fi
+fi
diff --git a/vm/infer-obj-seg.sh b/vm/infer-obj-seg.sh
new file mode 100755
index 0000000..de4dcec
--- /dev/null
+++ b/vm/infer-obj-seg.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+#SBATCH --job-name=infer-obj-seg-arr
+#
+#SBATCH --time=24:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=4
+#SBATCH --gres gpu:1
+#SBATCH --mem=16G
+#SBATCH --nice
+
+trim_sp() {
+    local var="$*"
+    # remove leading whitespace characters
+    var="${var#"${var%%[![:space:]]*}"}"
+    # remove trailing whitespace characters
+    var="${var%"${var##*[![:space:]]}"}"
+    echo -n "$var"
+}
+
+export PATH="/opt/singularity/bin:${PATH}"
+if [[ -n "${SLURM_JOB_ID}" ]]
+then
+    # the script is being run by slurm
+    if [[ -n "${SLURM_ARRAY_TASK_ID}" ]]
+    then
+        if [[ -n "${BATCH_FILE}" ]]
+        then
+            # here we use the array ID to pull out the right video
+            VIDEO_FILE=$(trim_sp $(sed -n "${SLURM_ARRAY_TASK_ID}{p;q;}" < "${BATCH_FILE}"))
+            cd "$(dirname "${BATCH_FILE}")"
+            if [[ -f "${VIDEO_FILE}" ]]
+            then
+                echo "DUMP OF CURRENT ENVIRONMENT:"
+                env
+                echo "BEGIN PROCESSING: ${VIDEO_FILE}"
+                H5_OUT_FILE="${VIDEO_FILE%.*}_obj_seg.h5"
+                module load singularity
+                singularity run --nv "${ROOT_DIR}/obj-seg-2019-07-17.simg" "${VIDEO_FILE}" "${H5_OUT_FILE}"
+                echo "FINISHED PROCESSING: ${VIDEO_FILE}"
+            else
+                echo "ERROR: could not find configuration file: ${VIDEO_FILE}" >&2
+            fi
+        else
+            echo "ERROR: the BATCH_FILE environment variable is not defined" >&2
+        fi
+    else
+        echo "ERROR: no SLURM_ARRAY_TASK_ID found" >&2
+    fi
+else
+    # the script is being run from command line. We should do a self-submit as an array job
+    if [[ -f "${1}" ]]
+    then
+        # echo "${1} is set and not empty"
+        echo "Preparing to submit batch file: ${1}"
+        test_count=$(wc -l < "${1}")
+        echo "Submitting an array job for ${test_count} videos"
+
+        # Here we perform a self-submit
+        sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}" --array="1-${test_count}" "${0}"
+    else
+        echo "ERROR: you need to provide a batch file to process. Eg: ./infer-obj-seg.sh batchfile.txt" >&2
+        exit 1
+    fi
+fi
diff --git a/vm/infer-poseest-batch.sh b/vm/infer-poseest-batch.sh
new file mode 100755
index 0000000..09b4f4d
--- /dev/null
+++ b/vm/infer-poseest-batch.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+#
+#SBATCH --job-name=infer-poseest-arr
+#
+#SBATCH --time=6:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=10
+#SBATCH --gres=gpu:1
+#SBATCH --qos=inference
+#SBATCH --mem=16G
+#SBATCH --nice
+
+trim_sp() {
+    local var="$*"
+    # remove leading whitespace characters
+    var="${var#"${var%%[![:space:]]*}"}"
+    # remove trailing whitespace characters
+    var="${var%"${var##*[![:space:]]}"}"
+    echo -n "$var"
+}
+
+export PATH="/opt/singularity/bin:${PATH}"
+if [[ -n "${SLURM_JOB_ID}" ]]
+then
+    # the script is being run by slurm
+    if [[ -n "${SLURM_ARRAY_TASK_ID}" ]]
+    then
+        if [[ -n "${BATCH_FILE}" ]]
+        then
+            # here we use the array ID to pull out the right video
+            VIDEO_FILE=$(trim_sp $(sed -n "${SLURM_ARRAY_TASK_ID}{p;q;}" < "${BATCH_FILE}"))
+            cd "$(dirname "${BATCH_FILE}")"
+            if [[ -f "${VIDEO_FILE}" ]]
+            then
+                echo "${VIDEO_FILE}"
+                echo "DUMP OF CURRENT ENVIRONMENT:"
+                env
+                echo "BEGIN PROCESSING: ${VIDEO_FILE}"
+                H5_OUT_FILE="${VIDEO_FILE%.*}_pose_est_v2.h5"
+                module load singularity
+                singularity run --nv "${ROOT_DIR}/deep-hres-net-2019-06-28.simg" "${VIDEO_FILE}" "${H5_OUT_FILE}"
+
+                # Retry several times if we have to. Unfortunately this is needed because
+                # ffmpeg will sporadically give the following error on winter:
+                #       ffmpeg: symbol lookup error: /.singularity.d/libs/libGL.so.1: undefined symbol: _glapi_tls_Current
+                #
+                # You can test this by simply running:
+                #       singularity exec --nv deep-hres-net-2019-06-28.simg ffmpeg
+                #
+                # which will fail about 1 out of 10 times or so. I (Keith) haven't been able to
+                # figure out a solution for this except for retrying several times.
+                MAX_RETRIES=10
+                for (( i=0; i<"${MAX_RETRIES}"; i++ ))
+                do
+                    if [[ ! -f "${H5_OUT_FILE}" ]]
+                    then
+                        echo "WARNING: FAILED TO GENERATE OUTPUT FILE. RETRY ATTEMPT ${i}"
+                        singularity run --nv "${ROOT_DIR}/deep-hres-net-2019-06-28.simg" "${VIDEO_FILE}" "${H5_OUT_FILE}"
+                    fi
+                done
+
+                if [[ ! -f "${H5_OUT_FILE}" ]]
+                then
+                    echo "ERROR: FAILED TO GENERATE OUTPUT FILE WITH NO MORE RETRIES"
+                fi
+
+                echo "FINISHED PROCESSING: ${VIDEO_FILE}"
+            else
+                echo "ERROR: could not find video file: ${VIDEO_FILE}" >&2
+            fi
+        else
+            echo "ERROR: the BATCH_FILE environment variable is not defined" >&2
+        fi
+    else
+        echo "ERROR: no SLURM_ARRAY_TASK_ID found" >&2
+    fi
+else
+    # the script is being run from command line. We should do a self-submit as an array job
+    if [[ -f "${1}" ]]
+    then
+        # echo "${1} is set and not empty"
+        echo "Preparing to submit batch file: ${1}"
+        test_count=$(wc -l < "${1}")
+        echo "Submitting an array job for ${test_count} videos"
+
+        # Here we perform a self-submit
+        sbatch --export=ROOT_DIR="$(dirname "${0}")",BATCH_FILE="${1}" --array="1-${test_count}%24" "${0}"
+    else
+        echo "ERROR: you need to provide a batch file to process. Eg: ./infer-poseest-batch.sh batchfile.txt" >&2
+        exit 1
+    fi
+fi
diff --git a/vm/multi-mouse-pose-2019-11-04.def b/vm/multi-mouse-pose-2019-11-04.def
new file mode 100644
index 0000000..aed9b42
--- /dev/null
+++ b/vm/multi-mouse-pose-2019-11-04.def
@@ -0,0 +1,39 @@
+# build like:
+#   singularity build --fakeroot multi-mouse-pose-2019-11-04.sif multi-mouse-pose-2019-11-04.def
+
+Bootstrap: docker
+From: nvcr.io/nvidia/cuda:10.0-base-ubuntu18.04
+
+#%files
+#    /home/sheppk/projects/pose-est-env /
+
+%files
+    ../requirements.txt /
+
+#%runscript
+#
+#    python3 -u /pose-est-env/deep-hres-net/tools/infermousepose.py \
+#        --model-file "/pose-est-env/pose-est-model.pth" \
+#        "/pose-est-env/pose-est-conf.yaml" \
+#        "${1}" "${2}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    #pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
+    #pip3 install 'torchvision==0.2.2' # getting "ImportError: libcudart.so.9.0" with 0.3
+    #pip3 install -r /pose-est-env/deep-hres-net/requirements.txt
+
+    # Cluster needs CUDA 9.2 version so we install separately
+    pip3 install torch==1.3.0+cu92 torchvision==0.4.1+cu92 -f https://download.pytorch.org/whl/torch_stable.html
+    pip3 install -r /requirements.txt
+
+    #chmod -R a+rw /pose-est-env
diff --git a/vm/multi-mouse-pose-2020-02-12.def b/vm/multi-mouse-pose-2020-02-12.def
new file mode 100644
index 0000000..0fb5f8c
--- /dev/null
+++ b/vm/multi-mouse-pose-2020-02-12.def
@@ -0,0 +1,39 @@
+# build like:
+#   singularity build --fakeroot multi-mouse-pose-2020-02-12.sif multi-mouse-pose-2020-02-12.def
+
+Bootstrap: docker
+From: nvcr.io/nvidia/cuda:10.2-base-ubuntu18.04
+
+%files
+    /home/sheppk/projects/pose-est-env /
+    ../requirements.txt /
+
+%runscript
+
+    # /pose-est-env/multimousepose-conf.yaml is experiments/multimouse/multimouse_2020-02-03_06.yaml
+    # and /pose-est-env/multimousepose.pth is
+    # output-multi-mouse/multimousepose/pose_hrnet/multimouse_2020-02-03_06/best_state.pth
+    python3 -u /pose-est-env/deep-hres-net/tools/infermultimousepose.py \
+        --max-embed-sep-within-instances 0.3 \
+        --min-embed-sep-between-instances 0.2 \
+        --min-pose-heatmap-val 1.0 \
+        --max-inst-dist-px 75 \
+        --pose-smoothing \
+        /pose-est-env/multimousepose.pth \
+        /pose-est-env/multimousepose-conf.yaml \
+        "${1}" "${2}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    # Cluster needs CUDA 9.2 version so we install separately
+    pip3 install torch==1.3.0+cu92 torchvision==0.4.1+cu92 -f https://download.pytorch.org/whl/torch_stable.html
+    pip3 install -r /requirements.txt
diff --git a/vm/obj-seg-2019-07-15.def b/vm/obj-seg-2019-07-15.def
new file mode 100644
index 0000000..2e722c3
--- /dev/null
+++ b/vm/obj-seg-2019-07-15.def
@@ -0,0 +1,29 @@
+Bootstrap: localimage
+From: cuda-2019-05-13.simg
+
+%files
+    /home/sheppk/projects/pose-est-env /
+
+%runscript
+
+    python3 -u /pose-est-env/deep-hres-net/tools/inferobjects.py \
+        --model-file "/pose-est-env/obj-seg-model.pth" \
+        "/pose-est-env/obj-seg-conf.yaml" \
+        "${1}" "${2}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
+    pip3 install 'torchvision==0.2.2' # getting "ImportError: libcudart.so.9.0" with 0.3
+    pip3 install -r /pose-est-env/deep-hres-net/requirements.txt
+
+    chmod -R a+rw /pose-est-env
diff --git a/vm/obj-seg-2019-07-16.def b/vm/obj-seg-2019-07-16.def
new file mode 100644
index 0000000..b2d3fef
--- /dev/null
+++ b/vm/obj-seg-2019-07-16.def
@@ -0,0 +1,31 @@
+Bootstrap: localimage
+From: cuda-2019-05-13.simg
+
+%files
+    /home/sheppk/projects/pose-est-env /
+
+%runscript
+
+    python3 -u /pose-est-env/deep-hres-net/tools/inferobjects.py \
+        --iou-threshold 0.5 \
+        --maximum-merge-duration-secs 1.0 \
+        --model-file "/pose-est-env/obj-seg-model.pth" \
+        "/pose-est-env/obj-seg-conf.yaml" \
+        "${1}" "${2}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
+    pip3 install 'torchvision==0.2.2' # getting "ImportError: libcudart.so.9.0" with 0.3
+    pip3 install -r /pose-est-env/deep-hres-net/requirements.txt
+
+    chmod -R a+rw /pose-est-env
diff --git a/vm/obj-seg-2019-07-17.def b/vm/obj-seg-2019-07-17.def
new file mode 100644
index 0000000..1cdd034
--- /dev/null
+++ b/vm/obj-seg-2019-07-17.def
@@ -0,0 +1,31 @@
+Bootstrap: localimage
+From: cuda-2019-05-13.simg
+
+%files
+    /home/sheppk/projects/pose-est-env /
+
+%runscript
+
+    python3 -u /pose-est-env/deep-hres-net/tools/inferobjects.py \
+        --iou-threshold 0.5 \
+        --maximum-merge-duration-secs 0.0 \
+        --model-file "/pose-est-env/obj-seg-model.pth" \
+        "/pose-est-env/obj-seg-conf.yaml" \
+        "${1}" "${2}"
+
+%post
+    apt-get -y update
+    apt-get -y install less
+    apt-get -y install vim
+    apt-get -y install ffmpeg
+    apt-get -y install python3-pip
+    apt-get -y install libsm6
+    apt-get -y install libxext6
+    apt-get -y install libxrender-dev
+    apt-get -y clean
+
+    pip3 install https://download.pytorch.org/whl/cu100/torch-1.1.0-cp36-cp36m-linux_x86_64.whl
+    pip3 install 'torchvision==0.2.2' # getting "ImportError: libcudart.so.9.0" with 0.3
+    pip3 install -r /pose-est-env/deep-hres-net/requirements.txt
+
+    chmod -R a+rw /pose-est-env
diff --git a/vm/train-fboli-detection.sh b/vm/train-fboli-detection.sh
new file mode 100755
index 0000000..628fcbc
--- /dev/null
+++ b/vm/train-fboli-detection.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#
+#SBATCH --job-name=train-fboli-detection
+#
+#SBATCH --time=5-00:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=8
+#SBATCH --gres=gpu:1
+#SBATCH --mem=16G
+#SBATCH --nice
+#SBATCH --qos=training
+
+# Example:
+# sbatch --export=NNETTRAIN_CFG="/abs/path/to/cfg.yaml" train-fboli-detection.sh
+
+
+export PATH="/opt/singularity/bin:${PATH}"
+
+echo "BEGIN TRAINING: ${NNETTRAIN_CFG}"
+module load singularity
+singularity exec --nv vm/multi-mouse-pose-2019-11-04.sif python3 tools/trainfecalboli.py \
+      --cfg "${NNETTRAIN_CFG}" \
+      --cvat-files data/fecal-boli/*.xml \
+      --image-dir data/fecal-boli/images
diff --git a/vm/train-multi-mouse-pose.sh b/vm/train-multi-mouse-pose.sh
new file mode 100755
index 0000000..15c4223
--- /dev/null
+++ b/vm/train-multi-mouse-pose.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+#SBATCH --job-name=train-multi-mouse-pose
+#
+#SBATCH --time=5-00:00:00
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --gres=gpu:1
+#SBATCH --mem=16G
+#SBATCH --nice
+#SBATCH --partition=gpu
+
+# Example:
+# sbatch --export=MMTRAIN_CFG="/abs/path/to/cfg.yaml" train-multi-mouse-pose.sh
+
+
+export PATH="/opt/singularity/bin:${PATH}"
+
+echo "BEGIN MULTI-MOUSE POSE TRAINING: ${MMTRAIN_CFG}"
+module load singularity
+singularity exec --nv vm/multi-mouse-pose-2019-11-04.sif python3 tools/trainmultimouse.py --cfg "${MMTRAIN_CFG}" --cvat-files data/multi-mouse/Annotations/*.xml data/multi-mouse/Annotations_NoMarkings/*.xml --image-dir data/multi-mouse/Dataset