[Feature] Refine DETA and H-Deformable-DETR project (#235)

* add deta specific train_net * update config and readme * refine readme * refine config * refine readme * refine version * refine H-Deform.DETR * add arxiv links * refine model zoo * release new result * refine h-detr * refine README
IDEA-Research · Mar 19, 2023 · cfd2b1e · cfd2b1e
1 parent 0f7e6a8
commit cfd2b1e
Show file tree

Hide file tree

Showing 13 changed files with 692 additions and 50 deletions.
diff --git a/README.md b/README.md
@@ -112,7 +112,7 @@ Results and models are available in [model zoo](https://detrex.readthedocs.io/en
 - [x] [DN-DETR (CVPR'2022 Oral)](./projects/dn_detr/)
 - [x] [DN-Deformable-DETR (CVPR'2022 Oral)](./projects/dn_deformable_detr/)
 - [x] [Group-DETR (ArXiv'2022)](./projects/group_detr/)
-- [x] [NMS strikes back (ArXiv'2022)](./projects/deta/)
+- [x] [DETA (ArXiv'2022)](./projects/deta/)
 - [x] [DINO (ICLR'2023)](./projects/dino/)
 - [x] [H-Deformable-DETR (CVPR'2023)](./projects/h_deformable_detr/)
 - [x] [MaskDINO (CVPR'2023)](./projects/maskdino/)

diff --git a/docs/source/tutorials/Model_Zoo.md b/docs/source/tutorials/Model_Zoo.md
@@ -520,63 +520,71 @@ Here we provides our pretrained baselines with **detrex**. And more pretrained w
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">Download</th>
 <!-- TABLE BODY -->
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_12ep.py"> H-Deformable-DETR-R50 + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_12ep.py"> H-Deformable-DETR-R50 + tricks (detrex) </a> </td>
+<td align="center">R50</td>
+<td align="center">IN1k</td>
+<td align="center">300</td>
+<td align="center">12</td>
+<td align="center">49.1</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/h_deformable_detr_r50_two_stage_12ep_modified_train_net.pth"> model </a></td>
+</tr>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_12ep.py"> H-Deformable-DETR-R50 + tricks (converted) </a> </td>
 <td align="center">R50</td>
 <td align="center">IN1k</td>
 <td align="center">300</td>
 <td align="center">12</td>
 <td align="center">48.9</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_12eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_36ep.py"> H-Deformable-DETR-R50 + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_r50_two_stage_36ep.py"> H-Deformable-DETR-R50 + tricks (converted) </a> </td>
 <td align="center">R50</td>
 <td align="center">IN1k</td>
 <td align="center">300</td>
 <td align="center">36</td>
 <td align="center">50.3</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/r50_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_36eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_12ep.py"> H-Deformable-DETR-Swin-T + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_12ep.py"> H-Deformable-DETR-Swin-T + tricks (converted) </a> </td>
 <td align="center">Swin-Tiny</td>
 <td align="center">IN1k</td>
 <td align="center">300</td>
 <td align="center">12</td>
 <td align="center">50.6</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_12eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_36ep.py"> H-Deformable-DETR-Swin-T + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_tiny_two_stage_36ep.py"> H-Deformable-DETR-Swin-T + tricks (converted) </a> </td>
 <td align="center">Swin-Tiny</td>
 <td align="center">IN1k</td>
 <td align="center">300</td>
 <td align="center">36</td>
 <td align="center">53.5</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/swin_tiny_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_36eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep.py"> H-Deformable-DETR-Swin-L + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep.py"> H-Deformable-DETR-Swin-L + tricks (converted) </a> </td>
 <td align="center">Swin-Large</td>
 <td align="center">IN22k</td>
 <td align="center">300</td>
 <td align="center">12</td>
 <td align="center">56.2</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_12eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep.py"> H-Deformable-DETR-Swin-L + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep.py"> H-Deformable-DETR-Swin-L + tricks (converted) </a> </td>
 <td align="center">Swin-Large</td>
 <td align="center">IN22k</td>
 <td align="center">300</td>
 <td align="center">36</td>
 <td align="center">57.5</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/drop_path0.5_swin_large_hybrid_branch_lambda1_group6_t1500_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_36eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep_900queries.py"> H-Deformable-DETR-Swin-L + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_12ep_900queries.py"> H-Deformable-DETR-Swin-L + tricks (converted) </a> </td>
 <td align="center">Swin-Large</td>
 <td align="center">IN22k</td>
 <td align="center">900</td>
 <td align="center">12</td>
 <td align="center">56.4</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.2.0/swin_large_hybrid_branch_lambda1_group6_t1500_n900_dp0_mqs_lft_deformable_detr_plus_iterative_bbox_refinement_plus_plus_two_stage_12eps.pth"> model </a></td>
 </tr>
- <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep_900queries.py"> H-Deformable-DETR-Swin-L + tricks </a> </td>
+ <tr><td align="left"> <a href="https://github.com/IDEA-Research/detrex/blob/main/projects/h_deformable_detr/configs/h_deformable_detr_swin_large_two_stage_36ep_900queries.py"> H-Deformable-DETR-Swin-L + tricks (converted) </a> </td>
 <td align="center">Swin-Large</td>
 <td align="center">IN22k</td>
 <td align="center">300</td>
@@ -598,23 +606,44 @@ Here we provides our pretrained baselines with **detrex**. And more pretrained w
 <th valign="bottom">box<br/>AP</th>
 <th valign="bottom">Download</th>
 <!-- TABLE BODY -->
- <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/improved_deformable_detr_baseline_50ep.py">Improved-Deformable-DETR-R50-50ep</a></td>
+ <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/improved_deformable_detr_baseline_50ep.py">Improved-Deformable-DETR-R50 (converted)</a></td>
 <td align="center">R-50</td>
 <td align="center">IN1k</td>
 <td align="center">50</td>
 <td align="center">49.8</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.3.0/converted_deta_improved_deformable_baseline.pth">model</a></td>
 </tr>
- <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_12ep.py">DETA-R50-5scale-12ep</a></td>
+ <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_12ep_bs8.py">DETA-R50-5scale (bs=8, 180000 iterations)</a></td>
+<td align="center">R-50</td>
+<td align="center">IN1k</td>
+<td align="center">12</td>
+<td align="center">50.0</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/deta_r50_5scale_12ep_bs8.pth">model</a></td>
+</tr>
+ <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_12ep.py">DETA-R50-5scale (with hacked train engine)</a></td>
+<td align="center">R-50</td>
+<td align="center">IN1k</td>
+<td align="center">12</td>
+<td align="center">49.9</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/deta_r50_5scale_12ep_hacked_trainer.pth">model</a></td>
+</tr>
+ <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_no_frozen_backbone.py">DETA-R50-5scale-12ep (no frozen backbone)</a></td>
+<td align="center">R-50</td>
+<td align="center">IN1k</td>
+<td align="center">12</td>
+<td align="center">50.2</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/deta_r50_5scale_12ep_no_freeze_backbone.pth">model</a></td>
+</tr>
+ <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_12ep.py">DETA-R50-5scale (converted)</a></td>
 <td align="center">R-50</td>
 <td align="center">IN1k</td>
 <td align="center">12</td>
 <td align="center">50.1</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.3.0/converted_deta_r50_5scale_12ep.pth">model</a></td>
 </tr>
- <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_12ep.py">DETA-Swin-Large-finetune</a></td>
+ <tr><td align="left"><a href="https://github.com/IDEA-Research/detrex/blob/main/projects/deta/configs/deta_r50_5scale_12ep.py">DETA-Swin-Large-finetune (converted)</a></td>
 <td align="center">Swin-Large-384</td>
-<td align="center">IN1k</td>
+<td align="center">Object 365</td>
 <td align="center">24</td>
 <td align="center">62.9</td>
 <td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.3.0/converted_deta_swin_o365_finetune.pth">model</a></td>

diff --git a/projects/deta/README.md b/projects/deta/README.md
@@ -1,4 +1,4 @@
-## Anchor DETR: Query Design for Transformer-Based Object Detection
+## NMS strikes back
 
 Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl
 
@@ -8,6 +8,47 @@ Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl
   <img src="./assets/deta.png"/>
 </div><br/>
 
+## Pretrained Weights
+Here we provide our pretrained DETA model based on detrex.
+<table><tbody>
+<!-- START TABLE -->
+<!-- TABLE HEADER -->
+<th valign="bottom">Name</th>
+<th valign="bottom">Backbone</th>
+<th valign="bottom">Pretrain</th>
+<th valign="bottom">Epochs</th>
+<th valign="bottom">box<br/>AP</th>
+<th valign="bottom">download</th>
+<!-- TABLE BODY -->
+ <tr><td align="left"><a href="configs/deta_r50_5scale_12ep.py">DETA-R50-5scale-12ep (bs=8)</a></td>
+<td align="center">R-50</td>
+<td align="center">IN1k</td>
+<td align="center">12</td>
+<td align="center">50.0</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/deta_r50_5scale_12ep_bs8.pth">model</a></td>
+</tr>
+<!-- TABLE BODY -->
+ <tr><td align="left"><a href="configs/deta_r50_5scale_12ep.py">DETA-R50-5scale-12ep (with hacked trainer and bs=16)</a></td>
+<td align="center">R-50</td>
+<td align="center">IN1k</td>
+<td align="center">12</td>
+<td align="center">49.9</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/deta_r50_5scale_12ep_hacked_trainer.pth">model</a></td>
+</tr>
+ <tr><td align="left"><a href="configs/deta_r50_5scale_no_frozen_backbone.py">DETA-R50-5scale-12ep (no frozen backbone)</a></td>
+<td align="center">R-50</td>
+<td align="center">IN1k</td>
+<td align="center">12</td>
+<td align="center">50.2</td>
+<td align="center"> <a href="https://github.com/IDEA-Research/detrex-storage/releases/download/v0.4.0/deta_r50_5scale_12ep_no_freeze_backbone.pth">model</a></td>
+</tr>
+</tbody></table>
+
+- For training DETA model with `batch_size=1` for each gpu. We recommend to use the default [train_net.py](https://github.com/IDEA-Research/detrex/blob/main/tools/train_net.py) to reproduce the results.
+- We also align the training hyper-param in the modified [train_net.py](./train_net.py) by hacking the optimizer.
+
+**Notable facts and caveats**: Using detrex default trainer with `optim.lr=1e-4`, it's better to train DETA model with `dataloader.train.total_batch_size=8` for 180000 iters, which can achieve 50.0 AP. However, with batch=16 for 90000 iters may only get 49.4AP, we guess the different hyper-param between our implementation and the official repo may influence the results.
+
 ## Converted Weights
 <table><tbody>
 <!-- START TABLE -->
@@ -45,12 +86,20 @@ Jeffrey Ouyang-Zhang, Jang Hyun Cho, Xingyi Zhou, Philipp Krähenbühl
 **Note:** Here we borrowed the pretrained weight from [DETA](https://github.com/jozhang97/DETA) official repo. And our detrex training results will be released in the future version.
 
 ## Training
-Training DETA-R50 model:
+**We prefer the users to train DETA with the hacked trainer:**
+
+```bash
+# 49.9 - 50.0AP
+cd detrex
+python projects/deta/train_net.py --config-file projects/deta/configs/deta_r50_5scale_12ep.py --num-gpus 8
+```
+
+**Or you can also using the default training engine with bs=8 to get 50.0AP result**
 ```bash
+# 50.0 AP
 cd detrex
-python tools/train_net.py --config-file projects/anchor_detr/configs/anchor_detr_r50_50ep.py --num-gpus 8
+python tools/train_net.py --config-file projects/deta/configs/deta_r50_5scale_12ep.py --num-gpus 8
 ```
-By default, we use 8 GPUs with total batch size as 64 for training.
 
 ## Evaluation
 Model evaluation can be done as follows:

diff --git a/projects/deta/configs/deta_r50_5scale_12ep.py b/projects/deta/configs/deta_r50_5scale_12ep.py
@@ -4,7 +4,6 @@
 
 # using the default optimizer and dataloader
 dataloader = get_config("common/data/coco_detr.py").dataloader
-optimizer = get_config("common/optim.py").AdamW
 train = get_config("common/train.py").train
 
 # modify training config
@@ -13,31 +12,13 @@
 
 # max training iterations
 train.max_iter = 90000
-
-# run evaluation every epoch (about 7500 iters)
 train.eval_period = 7500
-
-# log training infomation every 20 iters
-train.log_period = 20
-
-# save checkpoint every epoch (about 7500 iters)
 train.checkpointer.period = 7500
 
-# gradient clipping for training
-train.clip_grad.enabled = True
-train.clip_grad.params.max_norm = 0.1
-train.clip_grad.params.norm_type = 2
-
 # set training devices
 train.device = "cuda"
 model.device = train.device
 
-# modify optimizer config
-optimizer.lr = 1e-4
-optimizer.betas = (0.9, 0.999)
-optimizer.weight_decay = 1e-4
-optimizer.params.lr_factor_func = lambda module_name: 0.1 if "backbone" in module_name else 1
-
 # modify dataloader config
 dataloader.train.num_workers = 16
 
@@ -46,5 +27,3 @@
 # each gpu is 16/4 = 4
 dataloader.train.total_batch_size = 16
 
-# dump the testing results into output_dir for visualization
-dataloader.evaluator.output_dir = train.output_dir
diff --git a/projects/deta/configs/deta_r50_5scale_12ep_bs8.py b/projects/deta/configs/deta_r50_5scale_12ep_bs8.py
@@ -13,14 +13,14 @@
 
 # max training iterations
 train.max_iter = 180000
-
-# run evaluation every epoch (about 7500 iters)
 train.eval_period = 15000
-
-# save checkpoint every epoch (about 7500 iters)
 train.checkpointer.period = 15000
 
 
+# only freeze stem during training
+model.backbone.freeze_at = 1 
+
+
 # modify optimizer config
 optimizer.lr = 1e-4
 optimizer.betas = (0.9, 0.999)
@@ -32,5 +32,3 @@
 # each gpu is 16/4 = 4
 dataloader.train.total_batch_size = 8
 
-# dump the testing results into output_dir for visualization
-dataloader.evaluator.output_dir = train.output_dir
diff --git a/projects/deta/configs/deta_r50_5scale_no_frozen_backbone.py b/projects/deta/configs/deta_r50_5scale_no_frozen_backbone.py
@@ -0,0 +1,8 @@
+from .deta_r50_5scale_12ep import (
+    model,
+    train,
+    dataloader,
+    lr_multiplier
+)
+
+model.backbone.freeze_at = 1
diff --git a/projects/deta/configs/models/deta_r50.py b/projects/deta/configs/models/deta_r50.py
@@ -25,7 +25,7 @@
             norm="FrozenBN",
         ),
         out_features=["res3", "res4", "res5"],
-        freeze_at=1,
+        freeze_at=2,
     ),
     position_embedding=L(PositionEmbeddingSine)(
         num_pos_feats=128,