Skip to content

Commit

Permalink
modelv2 & add remote sensing configure
Browse files Browse the repository at this point in the history
  • Loading branch information
Winter-Jon committed Apr 26, 2024
1 parent 766c867 commit afe2344
Show file tree
Hide file tree
Showing 23 changed files with 5,421 additions and 70 deletions.
889 changes: 889 additions & 0 deletions benchmark.py

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions benchmark.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
OMP_NUM_THREADS=1 \
CUDA_VISIBLE_DEVICES="1" \
python benchmark.py --results-file benchmark.txt \
--model pacavit_tiny_p2cconv_100_0 \
--bench profile_deepspeed \
--num-bench-iter 100 \
--batch-size 128 --img-size 224 --num-classes 1000 \
--opt adamw --opt-eps 1e-8 --momentum 0.9 --weight-decay 0.05 \
--smoothing 0.1 --drop-path 0.1 \
--amp --channels-last \
7 changes: 4 additions & 3 deletions configs/mixformer.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# data
data_dir: ./datasets/IMNET1k
dataset: imagenet
num_classes: 1000
workers: 4
pin_mem: true

# model
model: biformer_tiny
model: mixformer_tiny
drop: 0.0
drop_path: 0.1

Expand All @@ -21,8 +22,8 @@ weight_decay: 0.05

# sched
sched: cosine
lr_base: 1.0e-3
min_lr: 1.0e-5
lr_base: 5.0e-4
min_lr: 5.0e-6
warmup_lr: 1.0e-06
warmup_epochs: 5
weight_decay: 2.0e-05
Expand Down
82 changes: 82 additions & 0 deletions configs/mixformer_scene_recognition.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# data
data_dir: ./datasets/millionaid
dataset: torch/millionaid
num_classes: 51
workers: 4
pin_mem: true

# model
model: mixformer_tiny
drop: 0.0
drop_path: 0.1

# opt
epochs: 100
opt: adamw
opt_eps: 1e-8
opt_betas:
- 0.9
- 0.999
momentum: 0.9
weight_decay: 0.05

# sched
sched: cosine
lr_base: 5.0e-4
min_lr: 5.0e-6
warmup_lr: 1.0e-06
warmup_epochs: 2
weight_decay: 2.0e-05
lr_base_scale: linear
lr_base_size: 512
auto_scale_warmup_min_lr: True

# cosine sched
lr_cycle_decay: 0.5
lr_cycle_limit: 1
lr_cycle_mul: 1.0
lr_k_decay: 1.0
lr_noise: null
lr_noise_pct: 0.67
lr_noise_std: 1.0
cooldown_epochs: 0
# patience_epochs: 10


# amp
amp: true
amp_dtype: float16
amp_impl: native

# ema
model_ema: false
model_ema_decay: 0.99996
model_ema_force_cpu: false

# mixup
mixup: 0.8
cutmix: 1.0
cutmix_minmax: null
mixup_prob: 1.0
mixup_switch_prob: 0.5
mixup_mode: batch

# others
grad_accum_steps: 1
clip_grad: null
aa: rand-m9-mstd0.5-inc1
color_jitter: 0.4
decay_rate: 0.1
smoothing: 0.1
train_interpolation: bicubic
repeated_aug: true
reprob: 0.25
remode: pixel
recount: 1
resplit: false

# log
# summary: torchinfo
# benchmark: calflops


13 changes: 13 additions & 0 deletions debug.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,16 @@ CUDA_VISIBLE_DEVICES="1" \
--data-dir datasets/IMNET1k \
--img-size 224 \
--batch-size 100

# OMP_NUM_THREADS=1 \
# CUDA_VISIBLE_DEVICES="1" \
# python -m debugpy --listen localhost:5678 --wait-for-client \
# main.py \
# --config configs/mixformer.yaml \
# --output outputs/classification \
# --experiment exp1_debug \
# --resume outputs/classification/mixformer_tiny_224/exp1/checkpoint-77.pth.tar \
# --model mixformer_tiny \
# --data-dir datasets/IMNET1k \
# --img-size 224 \
# --batch-size 100
6 changes: 6 additions & 0 deletions engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ def _backward(_loss):
mode=args.clip_mode,
)
optimizer.step()

# for name, param in model.named_parameters():
# assert torch.isfinite(param).all() == True, f"Param {param} not a number"

if has_no_sync and not need_update:
with model.no_sync():
Expand All @@ -104,6 +107,9 @@ def _backward(_loss):
loss = _forward()
_backward(loss)

# for name, param in model.named_parameters():
# assert torch.isfinite(param.grad).all() == True, f"optim {param.grad} not a number"

if not args.distributed:
losses_m.update(loss.item() * accum_steps, input.size(0))
update_sample_count += input.size(0)
Expand Down
11 changes: 8 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from torch.nn.parallel import DistributedDataParallel as NativeDDP

from timm import utils
from timm.data import create_dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset
from timm.data import create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset
from timm.layers import convert_splitbn_model, convert_sync_batchnorm, set_fast_norm
from timm.loss import JsdCrossEntropy, SoftTargetCrossEntropy, BinaryCrossEntropy, LabelSmoothingCrossEntropy
from timm.models import create_model, safe_model_name, resume_checkpoint, load_checkpoint
Expand All @@ -37,6 +37,7 @@

from engine import train_one_epoch, validate
from models import *
from data import create_dataset

import torchstat, torchinfo, torchsummary
from calflops import calculate_flops
Expand Down Expand Up @@ -78,7 +79,7 @@

def main():

# region Initial
# region Initial
args, args_text = _parse_args()

if torch.cuda.is_available():
Expand All @@ -103,7 +104,11 @@ def main():
str(data_config['input_size'][-1])
])


output_dir = utils.get_outdir(args.output if args.output else './output/train', exp_name, inc=not args.override)
if args.override:
shutil.rmtree(output_dir)
os.makedirs(output_dir)
log_dir = os.path.join(output_dir, "train_log.txt")

# modified ------>>>
Expand Down Expand Up @@ -325,7 +330,7 @@ def main():
else:
if utils.is_primary(args):
_logger.info("Using native Torch DistributedDataParallel.")
model = NativeDDP(model, device_ids=[], broadcast_buffers=not args.no_ddp_bb)
model = NativeDDP(model, device_ids=[device], broadcast_buffers=not args.no_ddp_bb)
# NOTE: EMA model does not need to be wrapped by DDP

if args.torchcompile:
Expand Down
Loading

0 comments on commit afe2344

Please sign in to comment.