initial commit

KumarLabJax · Oct 15, 2021 · 1301488 · 1301488
commit 1301488
Show file tree

Hide file tree

Showing 233 changed files with 41,169 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,112 @@
+# IntelliJ project files
+.idea
+*.iml
+out
+gen
+
+### Vim template
+[._]*.s[a-w][a-z]
+[._]s[a-w][a-z]
+*.un~
+Session.vim
+.netrwhist
+*~
+
+### IPythonNotebook template
+# Temporary data
+.ipynb_checkpoints/
+
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+#lib/
+#lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+.dirconf
+nosetests.xml
+coverage.xml
+*,cover
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+*.ipynb
+*.params
+*.json
+.vscode/
+
+lib/pycocotools/_mask.c
+lib/nms/cpu_nms.c
+
+output/*
+output-*/*
+models/*
+log/*
+log-*/*
+data/*
+external/
+temp*/
+testfbdata/
+fecal-boli-image-batch4/
+model-archive/
+
+draws/
+plot/
+
+*.avi
+*.simg
+*.sif
+*.h5
+runs/
+
+image-out*/
+sampled_frames*/
+sandbox/
+hard_frames/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2019 Leo Xiao, 2021 KumarLabJax
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,125 @@
+# Mouse Pose HR Net
+
+This repository is a forked and significantly modified version of the [official HRNet repository](https://github.com/leoxiaobin/deep-high-resolution-net.pytorch) in order to support mouse pose inference. This repository contains two main approaches for inferring mouse pose. Firstly we have a single mouse pose inference script: `tools/infermousepose.py`. The pose output for single mouse pose is currently used by our [gait analysis system](https://github.com/KumarLabJax/gaitanalysis) to extract the gait metrics that analize. We also have implemented multi-mouse pose estimation. The entry point for multi-mouse pose is the `tools/infermultimousepose.py` script. We describe these tools and others in more detail below:
+
+## Tools
+
+* `tools/addpixelunits.py`: adds pixel-to-centimeter conversion metadata to the pose file
+* `tools/infercorners.py`: this script performs corner detection which we use to convert pixel space to real-world physical space
+* `tools/inferfecalbolicount.py`: this script will locate fecal boli in the open field and provide minute by minute counts in the output
+* `tools/infermousepose.py`: we use this script to infer single mouse pose for every frame in a video
+* `tools/infermultimousepose.py`: we use this script to infer multi mouse pose for every frame in a video
+* `tools/mousetrain.py`: our script for training the neural network on single mouse pose
+* `tools/testcornermodel.py`: script for testing the corner detection model and printing accuracy statistics
+* `tools/testfecalboli.py`: script for testing the fecal boli detection model and printing accuracy statistics
+* `tools/testmouseposemodel.py`: script for testing the single mouse pose model and printing accuracy statistics
+* `tools/testmultimouseinference.py`: script for testing the multi-mouse model and printing accuracy statistics
+
+This repository includes the following tools. All of them provide command line help if they are run like: `python3 tools/scriptname.py --help`. Additionally most include comments in the script source code which show example invokations.
+
+## Installation
+
+Before starting make sure you have `python3` installed. This code has been developed and tested on `python 3.8.10`. The recommended approach to installing dependencies is to use a virtual like:
+
+    python3 -m venv mousepose-venv
+    source mousepose-venv/bin/activate
+
+    # now switch to the pose repo dir and install requirements
+    cd $MOUSEPOSE_REPO_DIR
+    pip3 install -r requirements.txt
+
+Note that we also have prebuilt singularity VMs that we will be providing to simplify this process.
+
+## Pose File Formats
+
+The following describes our pose file HDF5 formats. `tools/infermousepose.py` will generate the v2 format for single mouse and `tools/infermultimousepose.py` will generate the v3 format.
+
+### Single-Mouse Pose Estimation v2 Format
+
+Each video has a corresponding HDF5 file that contains pose estimation coordinates and confidences. These files will have the same name as the corresponding video except that you replace ".avi" with "_pose_est_v2.h5"
+
+Each HDF5 file contains two datasets:
+
+* "poseest/points":
+  this is a dataset with size (#frames x #keypoints x 2) where #keypoints is 12 following the indexing scheme shown below and the last dimension of size 2 is used hold the pixel (x, y) position of the respective frame # and keypoint #
+  the datatype is a 16bit unsigned integer
+* "poseest/confidence":
+  this dataset has size (#frames x #keypoints) and assigns a 0-1 confidence value to each of the 12 points (sometimes the confidence goes slightly higher than 1). I tend to threshold at 0.3 as being "very low confidence". When the mouse is not in the arena almost all confidence values should be < 0.3.
+  the datatype is a 32 bit floating point
+
+The "poseest" group can have attributes attached
+
+* "cm_per_pixel" (optional):
+  defines how many centimeters a pixel of open field represents
+  the datatype is 32 bit floating point scalar
+* "cm_per_pixel_source" (optional):
+  defines how the "cm_per_pixel" value was set. Value will be one of "corner_detection", "manually_set" or "default_alignment"
+  the datatype is string scalar
+
+The 12 point indexes have the following mapping to mouse body part:
+
+* NOSE_INDEX = 0
+* LEFT_EAR_INDEX = 1
+* RIGHT_EAR_INDEX = 2
+* BASE_NECK_INDEX = 3
+* LEFT_FRONT_PAW_INDEX = 4
+* RIGHT_FRONT_PAW_INDEX = 5
+* CENTER_SPINE_INDEX = 6
+* LEFT_REAR_PAW_INDEX = 7
+* RIGHT_REAR_PAW_INDEX = 8
+* BASE_TAIL_INDEX = 9
+* MID_TAIL_INDEX = 10
+* TIP_TAIL_INDEX = 11
+
+### Multi-Mouse Pose Estimation v3 Format
+
+Each video has a corresponding HDF5 file that contains pose estimation coordinates and confidences. These files will have the same name as the corresponding video except that you replace ".avi" with "_pose_est_v3.h5"
+
+Several of the datasets below have a dimension of length "maximum # instances". This is because the instance count can vary over time for a video either because mice are added or removed, or because of an error in inference. Since each frame has it's own instance count you must consult the "poseest/instance_count" dataset to determine the number of valid instances per frame.
+
+Each HDF5 file contains the following datasets:
+
+* "poseest/points":
+  this is a dataset with size (#frames x maximum # instances x #keypoints x 2) where #keypoints is 12 following the indexing scheme shown below and the last dimension of size 2 is used hold the pixel (y, x) position of the respective frame # and keypoint #
+  the datatype is a 16bit unsigned integer
+* "poseest/confidence":
+  this dataset has size (#frames x maximum # instances x #keypoints) and assigns a confidence value to each of the 12 points. Values of 0 indicate a missing point. Anything higher than 0 indicates a valid point, so in that sense this dataset can be treated as binary.
+  the datatype is a 32 bit floating point
+* "poseest/instance_count":
+  this dataset has size (#frames) and gives the instance count for every frame (this can change when mice are added and removed, or if inference fails for some frames)
+  the datatype is an 8 bit unsigned integer
+* "poseest/instance_embedding":
+  Most applications can ignore this dataset. This is a dataset with size (#frames x maximum # instances x #keypoints) where #keypoints is 12 following the indexing scheme shown below. This dataset contains the instance embedding for the respective instance at the respective frame and point.
+  the datatype is a 32 bit floating point
+* "poseest/instance_track_id":
+  this is a dataset with size (#frames x maximum # instances) and contains the instance_track_id for each instance index on a per frame basis.
+
+The "poseest" group can have attributes attached
+
+* "cm_per_pixel" (optional):
+  defines how many centimeters a pixel of open field represents
+  the datatype is 32 bit floating point scalar
+* "cm_per_pixel_source" (optional):
+  defines how the "cm_per_pixel" value was set. Value will be one of "corner_detection", "manually_set" or "default_alignment"
+  the datatype is string scalar
+
+The 12 keypoint indexes have the following mapping to mouse body part:
+
+* NOSE_INDEX = 0
+* LEFT_EAR_INDEX = 1
+* RIGHT_EAR_INDEX = 2
+* BASE_NECK_INDEX = 3
+* LEFT_FRONT_PAW_INDEX = 4
+* RIGHT_FRONT_PAW_INDEX = 5
+* CENTER_SPINE_INDEX = 6
+* LEFT_REAR_PAW_INDEX = 7
+* RIGHT_REAR_PAW_INDEX = 8
+* BASE_TAIL_INDEX = 9
+* MID_TAIL_INDEX = 10
+* TIP_TAIL_INDEX = 11
+
+## Licensing
+
+This code is released under MIT license.
+
+The data produced in the associated paper used for training models are released on Zenodo under a Non-Commercial license.
diff --git a/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml b/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml
@@ -0,0 +1,127 @@
+AUTO_RESUME: true
+CUDNN:
+  BENCHMARK: true
+  DETERMINISTIC: false
+  ENABLED: true
+DATA_DIR: ''
+GPUS: (0,1,2,3)
+OUTPUT_DIR: 'output'
+LOG_DIR: 'log'
+WORKERS: 24
+PRINT_FREQ: 100
+
+DATASET:
+  COLOR_RGB: true
+  DATASET: 'coco'
+  DATA_FORMAT: jpg
+  FLIP: true
+  NUM_JOINTS_HALF_BODY: 8
+  PROB_HALF_BODY: 0.3
+  ROOT: 'data/coco/'
+  ROT_FACTOR: 45
+  SCALE_FACTOR: 0.35
+  TEST_SET: 'val2017'
+  TRAIN_SET: 'train2017'
+MODEL:
+  INIT_WEIGHTS: true
+  NAME: pose_hrnet
+  NUM_JOINTS: 17
+  PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
+  TARGET_TYPE: gaussian
+  IMAGE_SIZE:
+  - 192
+  - 256
+  HEATMAP_SIZE:
+  - 48
+  - 64
+  SIGMA: 2
+  EXTRA:
+    PRETRAINED_LAYERS:
+    - 'conv1'
+    - 'bn1'
+    - 'conv2'
+    - 'bn2'
+    - 'layer1'
+    - 'transition1'
+    - 'stage2'
+    - 'transition2'
+    - 'stage3'
+    - 'transition3'
+    - 'stage4'
+    FINAL_CONV_KERNEL: 1
+    STAGE2:
+      NUM_MODULES: 1
+      NUM_BRANCHES: 2
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      FUSE_METHOD: SUM
+    STAGE3:
+      NUM_MODULES: 4
+      NUM_BRANCHES: 3
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      FUSE_METHOD: SUM
+    STAGE4:
+      NUM_MODULES: 3
+      NUM_BRANCHES: 4
+      BLOCK: BASIC
+      NUM_BLOCKS:
+      - 4
+      - 4
+      - 4
+      - 4
+      NUM_CHANNELS:
+      - 32
+      - 64
+      - 128
+      - 256
+      FUSE_METHOD: SUM
+LOSS:
+  USE_TARGET_WEIGHT: true
+TRAIN:
+  BATCH_SIZE_PER_GPU: 32
+  SHUFFLE: true
+  BEGIN_EPOCH: 0
+  END_EPOCH: 210
+  OPTIMIZER: adam
+  LR: 0.001
+  LR_FACTOR: 0.1
+  LR_STEP:
+  - 170
+  - 200
+  WD: 0.0001
+  GAMMA1: 0.99
+  GAMMA2: 0.0
+  MOMENTUM: 0.9
+  NESTEROV: false
+TEST:
+  BATCH_SIZE_PER_GPU: 32
+  COCO_BBOX_FILE: 'data/coco/person_detection_results/COCO_val2017_detections_AP_H_56_person.json'
+  BBOX_THRE: 1.0
+  IMAGE_THRE: 0.0
+  IN_VIS_THRE: 0.2
+  MODEL_FILE: ''
+  NMS_THRE: 1.0
+  OKS_THRE: 0.9
+  USE_GT_BBOX: true
+  FLIP_TEST: true
+  POST_PROCESS: true
+  SHIFT_HEATMAP: true
+DEBUG:
+  DEBUG: true
+  SAVE_BATCH_IMAGES_GT: true
+  SAVE_BATCH_IMAGES_PRED: true
+  SAVE_HEATMAPS_GT: true
+  SAVE_HEATMAPS_PRED: true