Skip to content

Commit

Permalink
First release of training code (TRI-ML#11)
Browse files Browse the repository at this point in the history
  • Loading branch information
VitorGuizilini-TRI authored May 12, 2020
1 parent fec6d0b commit f41342d
Show file tree
Hide file tree
Showing 350 changed files with 64,112 additions and 1,808 deletions.
75 changes: 57 additions & 18 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,43 +1,82 @@
# Copyright 2020 Toyota Research Institute. All rights reserved.
# Handy commands:
# - `make docker-build`: builds DOCKERIMAGE (default: `packnet-sfm:latest`)
PROJECT ?= packnet-sfm
WORKSPACE ?= /workspace/$(PROJECT)
DOCKER_IMAGE ?= ${PROJECT}:latest

DEPTH_TYPE ?= None
CROP ?= None
SAVE_OUTPUT ?= None

PYTHON ?= python
DOCKER_IMAGE ?= packnet-sfm:master-latest
DOCKER_OPTS := --name packnet-sfm --rm -it \
SHMSIZE ?= 444G
WANDB_MODE ?= run
DOCKER_OPTS := \
--name ${PROJECT} \
--rm -it \
--shm-size=${SHMSIZE} \
-e AWS_DEFAULT_REGION \
-e AWS_ACCESS_KEY_ID \
-e AWS_SECRET_ACCESS_KEY \
-e WANDB_API_KEY \
-e WANDB_ENTITY \
-e WANDB_MODE \
-e HOST_HOSTNAME= \
-e OMP_NUM_THREADS=1 -e KMP_AFFINITY="granularity=fine,compact,1,0" \
-e OMPI_ALLOW_RUN_AS_ROOT=1 \
-e OMPI_ALLOW_RUN_AS_ROOT_CONFIRM=1 \
-e NCCL_DEBUG=VERSION \
-e DISPLAY=${DISPLAY} \
-e XAUTHORITY \
-e NVIDIA_DRIVER_CAPABILITIES=all \
-v ~/.aws:/root/.aws \
-v /root/.ssh:/root/.ssh \
-v ~/.cache:/root/.cache \
-v /data:/data \
-v ${PWD}:/workspace/self-supervised-learning \
-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
-v /mnt/fsx/:/mnt/fsx \
-v /dev/null:/dev/raw1394 \
-w /workspace/self-supervised-learning \
--shm-size=444G \
-v /tmp:/tmp \
-v /tmp/.X11-unix/X0:/tmp/.X11-unix/X0 \
-v /var/run/docker.sock:/var/run/docker.sock \
-v ${PWD}:${WORKSPACE} \
-w ${WORKSPACE} \
--privileged \
--ipc=host \
--network=host

.PHONY: all clean docker-build
NGPUS=$(shell nvidia-smi -L | wc -l)
MPI_CMD=mpirun \
-allow-run-as-root \
-np ${NGPUS} \
-H localhost:${NGPUS} \
-x MASTER_ADDR=127.0.0.1 \
-x MASTER_PORT=23457 \
-x HOROVOD_TIMELINE \
-x OMP_NUM_THREADS=1 \
-x KMP_AFFINITY='granularity=fine,compact,1,0' \
-bind-to none -map-by slot -x NCCL_DEBUG=INFO -x NCCL_MIN_NRINGS=4 \
--report-bindings


.PHONY: all clean docker-build docker-overfit-pose

all: clean

clean:
find . -name "*.pyc" | xargs rm -f && \
find . -name "__pycache__" | xargs rm -rf


docker-build:
docker build \
-t ${DOCKER_IMAGE} . -f docker/Dockerfile
-f docker/Dockerfile \
-t ${DOCKER_IMAGE} .

docker-start-interactive: docker-build
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} bash

docker-start-jupyter: docker-build
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
bash
bash -c "jupyter notebook --port=8888 -ip=0.0.0.0 --allow-root --no-browser"

docker-evaluate-depth: docker-build
docker-run: docker-build
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
bash -c "bash scripts/evaluate_depth.sh ${MODEL} ${INPUT_PATH} ${DEPTH_TYPE} ${CROP} ${SAVE_OUTPUT}"
bash -c "${COMMAND}"

docker-run-mpi: docker-build
nvidia-docker run ${DOCKER_OPTS} ${DOCKER_IMAGE} \
bash -c "${MPI_CMD} ${COMMAND}"
251 changes: 182 additions & 69 deletions README.md

Large diffs are not rendered by default.

184 changes: 184 additions & 0 deletions configs/default_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
"""Default packnet_sfm configuration parameters (overridable in configs/*.yaml)
"""

import os
from yacs.config import CfgNode as CN

########################################################################################################################
cfg = CN()
cfg.name = '' # Run name
cfg.debug = False # Debugging flag
########################################################################################################################
### ARCH
########################################################################################################################
cfg.arch = CN()
cfg.arch.seed = 42 # Random seed for Pytorch/Numpy initialization
cfg.arch.min_epochs = 1 # Minimum number of epochs
cfg.arch.max_epochs = 50 # Maximum number of epochs
########################################################################################################################
### CHECKPOINT
########################################################################################################################
cfg.checkpoint = CN()
cfg.checkpoint.filepath = '' # Checkpoint filepath to save data
cfg.checkpoint.save_top_k = 5 # Number of best models to save
cfg.checkpoint.monitor = 'loss' # Metric to monitor for logging
cfg.checkpoint.monitor_index = 0 # Dataset index for the metric to monitor
cfg.checkpoint.mode = 'auto' # Automatically determine direction of improvement (increase or decrease)
cfg.checkpoint.s3_path = '' # s3 path for AWS model syncing
cfg.checkpoint.s3_frequency = 1 # How often to s3 sync
########################################################################################################################
### SAVE
########################################################################################################################
cfg.save = CN()
cfg.save.folder = '' # Folder where data will be saved
cfg.save.viz = True # Flag for saving inverse depth map visualization
cfg.save.npz = True # Flag for saving numpy depth maps
########################################################################################################################
### WANDB
########################################################################################################################
cfg.wandb = CN()
cfg.wandb.dry_run = True # Wandb dry-run (not logging)
cfg.wandb.name = '' # Wandb run name
cfg.wandb.project = os.environ.get("WANDB_PROJECT", "") # Wandb project
cfg.wandb.entity = os.environ.get("WANDB_ENTITY", "") # Wandb entity
cfg.wandb.tags = [] # Wandb tags
cfg.wandb.dir = '' # Wandb save folder
########################################################################################################################
### MODEL
########################################################################################################################
cfg.model = CN()
cfg.model.name = '' # Training model
cfg.model.checkpoint_path = '' # Checkpoint path for model saving
########################################################################################################################
### MODEL.OPTIMIZER
########################################################################################################################
cfg.model.optimizer = CN()
cfg.model.optimizer.name = 'Adam' # Optimizer name
cfg.model.optimizer.depth = CN()
cfg.model.optimizer.depth.lr = 0.0002 # Depth learning rate
cfg.model.optimizer.depth.weight_decay = 0.0 # Dept weight decay
cfg.model.optimizer.pose = CN()
cfg.model.optimizer.pose.lr = 0.0002 # Pose learning rate
cfg.model.optimizer.pose.weight_decay = 0.0 # Pose weight decay
########################################################################################################################
### MODEL.SCHEDULER
########################################################################################################################
cfg.model.scheduler = CN()
cfg.model.scheduler.name = 'StepLR' # Scheduler name
cfg.model.scheduler.step_size = 10 # Scheduler step size
cfg.model.scheduler.gamma = 0.5 # Scheduler gamma value
cfg.model.scheduler.T_max = 20 # Scheduler maximum number of iterations
########################################################################################################################
### MODEL.PARAMS
########################################################################################################################
cfg.model.params = CN()
cfg.model.params.crop = '' # Which crop should be used during evaluation
cfg.model.params.min_depth = 0.0 # Minimum depth value to evaluate
cfg.model.params.max_depth = 80.0 # Maximum depth value to evaluate
########################################################################################################################
### MODEL.LOSS
########################################################################################################################
cfg.model.loss = CN()
#
cfg.model.loss.num_scales = 4 # Number of inverse depth scales to use
cfg.model.loss.progressive_scaling = 0.0 # Training percentage to decay number of scales
cfg.model.loss.flip_lr_prob = 0.5 # Probablity of horizontal flippping
cfg.model.loss.rotation_mode = 'euler' # Rotation mode
cfg.model.loss.upsample_depth_maps = True # Resize depth maps to highest resolution
#
cfg.model.loss.ssim_loss_weight = 0.85 # SSIM loss weight
cfg.model.loss.occ_reg_weight = 0.1 # Occlusion regularizer loss weight
cfg.model.loss.smooth_loss_weight = 0.001 # Smoothness loss weight
cfg.model.loss.C1 = 1e-4 # SSIM parameter
cfg.model.loss.C2 = 9e-4 # SSIM parameter
cfg.model.loss.photometric_reduce_op = 'min' # Method for photometric loss reducing
cfg.model.loss.disp_norm = True # Inverse depth normalization
cfg.model.loss.clip_loss = 0.0 # Clip loss threshold variance
cfg.model.loss.padding_mode = 'zeros' # Photometric loss padding mode
cfg.model.loss.automask_loss = True # Automasking to remove static pixels
#
cfg.model.loss.supervised_method = 'sparse-l1' # Method for depth supervision
cfg.model.loss.supervised_num_scales = 4 # Number of scales for supervised learning
cfg.model.loss.supervised_loss_weight = 0.9 # Supervised loss weight
########################################################################################################################
### MODEL.DEPTH_NET
########################################################################################################################
cfg.model.depth_net = CN()
cfg.model.depth_net.name = '' # Depth network name
cfg.model.depth_net.checkpoint_path = '' # Depth checkpoint filepath
cfg.model.depth_net.version = '' # Depth network version
cfg.model.depth_net.dropout = 0.0 # Depth network dropout
########################################################################################################################
### MODEL.POSE_NET
########################################################################################################################
cfg.model.pose_net = CN()
cfg.model.pose_net.name = '' # Pose network name
cfg.model.pose_net.checkpoint_path = '' # Pose checkpoint filepath
cfg.model.pose_net.version = '' # Pose network version
cfg.model.pose_net.dropout = 0.0 # Pose network dropout
########################################################################################################################
### DATASETS
########################################################################################################################
cfg.datasets = CN()
########################################################################################################################
### DATASETS.AUGMENTATION
########################################################################################################################
cfg.datasets.augmentation = CN()
cfg.datasets.augmentation.image_shape = (192, 640) # Image shape
cfg.datasets.augmentation.jittering = (0.2, 0.2, 0.2, 0.05) # Color jittering values
########################################################################################################################
### DATASETS.TRAIN
########################################################################################################################
cfg.datasets.train = CN()
cfg.datasets.train.batch_size = 8 # Training batch size
cfg.datasets.train.num_workers = 16 # Training number of workers
cfg.datasets.train.back_context = 1 # Training backward context
cfg.datasets.train.forward_context = 1 # Training forward context
cfg.datasets.train.dataset = [] # Training dataset
cfg.datasets.train.path = [] # Training data path
cfg.datasets.train.split = [] # Training split
cfg.datasets.train.depth_type = [''] # Training depth type
cfg.datasets.train.cameras = [] # Training cameras
cfg.datasets.train.repeat = [1] # Number of times training dataset is repeated per epoch
cfg.datasets.train.num_logs = 5 # Number of training images to log
########################################################################################################################
### DATASETS.VALIDATION
########################################################################################################################
cfg.datasets.validation = CN()
cfg.datasets.validation.batch_size = 1 # Validation batch size
cfg.datasets.validation.num_workers = 8 # Validation number of workers
cfg.datasets.validation.back_context = 0 # Validation backward context
cfg.datasets.validation.forward_context = 0 # Validation forward contxt
cfg.datasets.validation.dataset = [] # Validation dataset
cfg.datasets.validation.path = [] # Validation data path
cfg.datasets.validation.split = [] # Validation split
cfg.datasets.validation.depth_type = [''] # Validation depth type
cfg.datasets.validation.cameras = [] # Validation cameras
cfg.datasets.validation.num_logs = 5 # Number of validation images to log
########################################################################################################################
### DATASETS.TEST
########################################################################################################################
cfg.datasets.test = CN()
cfg.datasets.test.batch_size = 1 # Test batch size
cfg.datasets.test.num_workers = 8 # Test number of workers
cfg.datasets.test.back_context = 0 # Test backward context
cfg.datasets.test.forward_context = 0 # Test forward context
cfg.datasets.test.dataset = [] # Test dataset
cfg.datasets.test.path = [] # Test data path
cfg.datasets.test.split = [] # Test split
cfg.datasets.test.depth_type = [''] # Test depth type
cfg.datasets.test.cameras = [] # Test cameras
cfg.datasets.test.num_logs = 5 # Number of test images to log
########################################################################################################################
### THESE SHOULD NOT BE CHANGED
########################################################################################################################
cfg.config = '' # Run configuration file
cfg.default = '' # Run default configuration file
cfg.wandb.url = '' # Wandb URL
cfg.checkpoint.s3_url = '' # s3 URL
cfg.save.pretrained = '' # Pretrained checkpoint
cfg.prepared = False # Prepared flag
########################################################################################################################

def get_cfg_defaults():
return cfg.clone()
25 changes: 25 additions & 0 deletions configs/eval_ddad.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
model:
name: 'SelfSupModel'
depth_net:
name: 'PackNet01'
version: '1A'
pose_net:
name: 'PoseNet'
version: ''
params:
crop: ''
min_depth: 0.0
max_depth: 200.0
datasets:
augmentation:
image_shape: (384, 640)
test:
dataset: ['DGP']
path: ['/data/datasets/DDAD/ddad.json']
split: ['val']
depth_type: ['lidar']
cameras: ['camera_01']
save:
folder: '/data/save'
viz: True
npz: True
19 changes: 19 additions & 0 deletions configs/eval_image.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
model:
name: 'SelfSupModel'
depth_net:
name: 'PackNet01'
version: '1A'
pose_net:
name: 'PoseNet'
version: ''
datasets:
augmentation:
image_shape: (384, 640)
test:
dataset: ['Image']
path: ['images']
split: ['{:010d}']
save:
folder: '/data/save'
viz: True
npy: True
24 changes: 24 additions & 0 deletions configs/eval_kitti.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
model:
name: 'SelfSupModel'
depth_net:
name: 'PackNet01'
version: '1A'
pose_net:
name: 'PoseNet'
version: ''
params:
crop: 'garg'
min_depth: 0.0
max_depth: 80.0
datasets:
augmentation:
image_shape: (192, 640)
test:
dataset: ['KITTI']
path: ['/data/datasets/KITTI_raw']
split: ['data_splits/eigen_test_files.txt']
depth_type: ['velodyne']
save:
folder: '/data/save'
viz: True
npz: True
Loading

0 comments on commit f41342d

Please sign in to comment.